diff --git a/src/Veldrid.MetalBindings/MTLComputeCommandEncoder.cs b/src/Veldrid.MetalBindings/MTLComputeCommandEncoder.cs index f61c488a2..422001fbc 100644 --- a/src/Veldrid.MetalBindings/MTLComputeCommandEncoder.cs +++ b/src/Veldrid.MetalBindings/MTLComputeCommandEncoder.cs @@ -15,6 +15,7 @@ public struct MTLComputeCommandEncoder private static readonly Selector sel_endEncoding = "endEncoding"; private static readonly Selector sel_setTexture = "setTexture:atIndex:"; private static readonly Selector sel_setSamplerState = "setSamplerState:atIndex:"; + private static readonly Selector sel_setBytes = "setBytes:length:atIndex:"; public void setComputePipelineState(MTLComputePipelineState state) => objc_msgSend(NativePtr, sel_setComputePipelineState, state.NativePtr); @@ -25,6 +26,9 @@ public void setBuffer(MTLBuffer buffer, UIntPtr offset, UIntPtr index) offset, index); + public unsafe void setBytes(void* bytes, UIntPtr length, UIntPtr index) + => objc_msgSend(NativePtr, sel_setBytes, bytes, length, index); + public void dispatchThreadGroups(MTLSize threadgroupsPerGrid, MTLSize threadsPerThreadgroup) => objc_msgSend(NativePtr, sel_dispatchThreadgroups0, threadgroupsPerGrid, threadsPerThreadgroup); diff --git a/src/Veldrid.MetalBindings/MTLComputePipelineState.cs b/src/Veldrid.MetalBindings/MTLComputePipelineState.cs index c2b390c63..cd68830f2 100644 --- a/src/Veldrid.MetalBindings/MTLComputePipelineState.cs +++ b/src/Veldrid.MetalBindings/MTLComputePipelineState.cs @@ -6,5 +6,6 @@ public struct MTLComputePipelineState { public readonly IntPtr NativePtr; public MTLComputePipelineState(IntPtr ptr) => NativePtr = ptr; + public bool IsNull => NativePtr == IntPtr.Zero; } } \ No newline at end of file diff --git a/src/Veldrid.MetalBindings/ObjectiveCRuntime.cs b/src/Veldrid.MetalBindings/ObjectiveCRuntime.cs index 03c8d1c59..9c22ce0e7 100644 --- a/src/Veldrid.MetalBindings/ObjectiveCRuntime.cs +++ b/src/Veldrid.MetalBindings/ObjectiveCRuntime.cs @@ -31,6 +31,8 @@ public static unsafe class ObjectiveCRuntime [DllImport(ObjCLibrary, EntryPoint = "objc_msgSend")] public static extern void objc_msgSend(IntPtr receiver, Selector selector, IntPtr a, UIntPtr b, UIntPtr c); [DllImport(ObjCLibrary, EntryPoint = "objc_msgSend")] + public static extern void objc_msgSend(IntPtr receiver, Selector selector, void* a, UIntPtr b, UIntPtr c); + [DllImport(ObjCLibrary, EntryPoint = "objc_msgSend")] public static extern void objc_msgSend(IntPtr receiver, Selector selector, MTLPrimitiveType a, UIntPtr b, UIntPtr c, UIntPtr d, UIntPtr e); [DllImport(ObjCLibrary, EntryPoint = "objc_msgSend")] public static extern void objc_msgSend(IntPtr receiver, Selector selector, NSRange a); diff --git a/src/Veldrid.Tests/BufferTests.cs b/src/Veldrid.Tests/BufferTests.cs index 0b0badf48..50d639436 100644 --- a/src/Veldrid.Tests/BufferTests.cs +++ b/src/Veldrid.Tests/BufferTests.cs @@ -303,10 +303,108 @@ public void CommandList_Update_Staging() } } + [Theory] + [InlineData( + 60, BufferUsage.VertexBuffer, 1, + 70, BufferUsage.VertexBuffer, 13, + 11)] + [InlineData( + 60, BufferUsage.Staging, 1, + 70, BufferUsage.VertexBuffer, 13, + 11)] + [InlineData( + 60, BufferUsage.VertexBuffer, 1, + 70, BufferUsage.Staging, 13, + 11)] + [InlineData( + 60, BufferUsage.Staging, 1, + 70, BufferUsage.Staging, 13, + 11)] + [InlineData( + 5, BufferUsage.VertexBuffer, 3, + 10, BufferUsage.VertexBuffer, 7, + 2)] + public void Copy_UnalignedRegion( + uint srcBufferSize, BufferUsage srcUsage, uint srcCopyOffset, + uint dstBufferSize, BufferUsage dstUsage, uint dstCopyOffset, + uint copySize) + { + DeviceBuffer src = CreateBuffer(srcBufferSize, srcUsage); + DeviceBuffer dst = CreateBuffer(dstBufferSize, dstUsage); + + byte[] data = Enumerable.Range(0, (int)srcBufferSize).Select(i => (byte)i).ToArray(); + GD.UpdateBuffer(src, 0, data); + + CommandList cl = RF.CreateCommandList(); + cl.Begin(); + cl.CopyBuffer(src, srcCopyOffset, dst, dstCopyOffset, copySize); + cl.End(); + + GD.SubmitCommands(cl); + GD.WaitForIdle(); + + DeviceBuffer readback = GetReadback(dst); + + MappedResourceView readView = GD.Map(readback, MapMode.Read); + for (uint i = 0; i < copySize; i++) + { + byte expected = data[i + srcCopyOffset]; + byte actual = readView[i + dstCopyOffset]; + Assert.Equal(expected, actual); + } + GD.Unmap(readback); + } + + [Theory] + [InlineData(BufferUsage.VertexBuffer, 13, 5, 1)] + [InlineData(BufferUsage.Staging, 13, 5, 1)] + public void CommandList_UpdateNonStaging_Unaligned(BufferUsage usage, uint bufferSize, uint dataSize, uint offset) + { + DeviceBuffer buffer = CreateBuffer(bufferSize, usage); + byte[] data = Enumerable.Range(0, (int)dataSize).Select(i => (byte)i).ToArray(); + CommandList cl = RF.CreateCommandList(); + cl.Begin(); + cl.UpdateBuffer(buffer, offset, data); + cl.End(); + GD.SubmitCommands(cl); + GD.WaitForIdle(); + + DeviceBuffer readback = GetReadback(buffer); + MappedResourceView readView = GD.Map(readback, MapMode.Read); + for (uint i = 0; i < dataSize; i++) + { + byte expected = data[i]; + byte actual = readView[i + offset]; + Assert.Equal(expected, actual); + } + GD.Unmap(readback); + } + private DeviceBuffer CreateBuffer(uint size, BufferUsage usage) { return RF.CreateBuffer(new BufferDescription(size, usage)); } + + private DeviceBuffer GetReadback(DeviceBuffer buffer) + { + DeviceBuffer readback; + if ((buffer.Usage & BufferUsage.Staging) != 0) + { + readback = buffer; + } + else + { + readback = CreateBuffer(buffer.SizeInBytes, BufferUsage.Staging); + CommandList cl = RF.CreateCommandList(); + cl.Begin(); + cl.CopyBuffer(buffer, 0, readback, 0, buffer.SizeInBytes); + cl.End(); + GD.SubmitCommands(cl); + GD.WaitForIdle(); + } + + return readback; + } } public class OpenGLBufferTests : BufferTestBase { } diff --git a/src/Veldrid/MTL/EmbeddedShaders/MTL_UnalignedBufferCopy_iOS.metallib b/src/Veldrid/MTL/EmbeddedShaders/MTL_UnalignedBufferCopy_iOS.metallib new file mode 100644 index 000000000..10451a6d4 Binary files /dev/null and b/src/Veldrid/MTL/EmbeddedShaders/MTL_UnalignedBufferCopy_iOS.metallib differ diff --git a/src/Veldrid/MTL/EmbeddedShaders/MTL_UnalignedBufferCopy_macOS.metallib b/src/Veldrid/MTL/EmbeddedShaders/MTL_UnalignedBufferCopy_macOS.metallib new file mode 100644 index 000000000..a7cd68e78 Binary files /dev/null and b/src/Veldrid/MTL/EmbeddedShaders/MTL_UnalignedBufferCopy_macOS.metallib differ diff --git a/src/Veldrid/MTL/EmbeddedShaders/UnalignedBufferCopy.metal b/src/Veldrid/MTL/EmbeddedShaders/UnalignedBufferCopy.metal new file mode 100644 index 000000000..2c74667ba --- /dev/null +++ b/src/Veldrid/MTL/EmbeddedShaders/UnalignedBufferCopy.metal @@ -0,0 +1,21 @@ +#include +using namespace metal; + +// This must be kept in sync with MTLUnalignedBufferCopyInfo.cs +struct CopyInfo +{ + uint32_t SrcOffset; + uint32_t DstOffset; + uint32_t CopySize; +}; + +kernel void copy_bytes( + device uint8_t* src [[ buffer(0) ]], + device uint8_t* dst [[ buffer(1) ]], + constant CopyInfo& info [[ buffer(2) ]]) +{ + for (uint32_t i = 0; i < info.CopySize; i++) + { + dst[i + info.DstOffset] = src[i + info.SrcOffset]; + } +} \ No newline at end of file diff --git a/src/Veldrid/MTL/EmbeddedShaders/recompile-shaders.sh b/src/Veldrid/MTL/EmbeddedShaders/recompile-shaders.sh new file mode 100755 index 000000000..501d44f57 --- /dev/null +++ b/src/Veldrid/MTL/EmbeddedShaders/recompile-shaders.sh @@ -0,0 +1,12 @@ +#!/usr/bin/env bash + +script_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" + +xcrun -sdk macosx metal $script_dir/UnalignedBufferCopy.metal -o $script_dir/UnalignedBufferCopy.macos.air +xcrun -sdk macosx metallib $script_dir/UnalignedBufferCopy.macos.air -o $script_dir/MTL_UnalignedBufferCopy_macOS.metallib + +xcrun -sdk iphoneos metal $script_dir/UnalignedBufferCopy.metal -o $script_dir/UnalignedBufferCopy.ios.air +xcrun -sdk iphoneos metallib $script_dir/UnalignedBufferCopy.ios.air -o $script_dir/MTL_UnalignedBufferCopy_iOS.metallib + +rm $script_dir/UnalignedBufferCopy.macos.air +rm $script_dir/UnalignedBufferCopy.ios.air diff --git a/src/Veldrid/MTL/MTLCommandList.cs b/src/Veldrid/MTL/MTLCommandList.cs index a8c001c55..bcd48127b 100644 --- a/src/Veldrid/MTL/MTLCommandList.cs +++ b/src/Veldrid/MTL/MTLCommandList.cs @@ -222,6 +222,7 @@ private void PreComputeCommand() public override void End() { EnsureNoBlitEncoder(); + EnsureNoComputeEncoder(); if (!_currentFramebufferEverActive && _mtlFramebuffer != null) { @@ -280,29 +281,30 @@ public override void SetViewport(uint index, ref Viewport viewport) public override void UpdateBuffer(DeviceBuffer buffer, uint bufferOffsetInBytes, IntPtr source, uint sizeInBytes) { - if (bufferOffsetInBytes % 4 != 0) - { - throw new VeldridException("Metal needs 4-byte-multiple buffer copy size and offset."); - } - if (sizeInBytes % 4 != 0 && bufferOffsetInBytes != 0 && sizeInBytes != buffer.SizeInBytes) - { - throw new VeldridException("Metal needs 4-byte-multiple buffer copy size and offset."); - } - - Debug.Assert(bufferOffsetInBytes % 4 == 0); - - uint sizeRoundFactor = (4 - (sizeInBytes % 4)) % 4; + bool useComputeCopy = (bufferOffsetInBytes % 4 != 0) + || (sizeInBytes % 4 != 0 && bufferOffsetInBytes != 0 && sizeInBytes != buffer.SizeInBytes); MTLBuffer dstMTLBuffer = Util.AssertSubtype(buffer); // TODO: Cache these, and rely on the command buffer's completion callback to add them back to a shared pool. MTLBuffer copySrc = Util.AssertSubtype( _gd.ResourceFactory.CreateBuffer(new BufferDescription(sizeInBytes, BufferUsage.Staging))); _gd.UpdateBuffer(copySrc, 0, source, sizeInBytes); - EnsureBlitEncoder(); - _bce.copy( - copySrc.DeviceBuffer, UIntPtr.Zero, - dstMTLBuffer.DeviceBuffer, (UIntPtr)bufferOffsetInBytes, - (UIntPtr)(sizeInBytes + sizeRoundFactor)); + + if (useComputeCopy) + { + CopyBufferCore(copySrc, 0, buffer, bufferOffsetInBytes, sizeInBytes); + } + else + { + Debug.Assert(bufferOffsetInBytes % 4 == 0); + uint sizeRoundFactor = (4 - (sizeInBytes % 4)) % 4; + EnsureBlitEncoder(); + _bce.copy( + copySrc.DeviceBuffer, UIntPtr.Zero, + dstMTLBuffer.DeviceBuffer, (UIntPtr)bufferOffsetInBytes, + (UIntPtr)(sizeInBytes + sizeRoundFactor)); + } + copySrc.Dispose(); } @@ -313,18 +315,33 @@ protected override void CopyBufferCore( uint destinationOffset, uint sizeInBytes) { + MTLBuffer mtlSrc = Util.AssertSubtype(source); + MTLBuffer mtlDst = Util.AssertSubtype(destination); + if (sourceOffset % 4 != 0 || sizeInBytes % 4 != 0) { - throw new NotImplementedException("Metal needs 4-byte-multiple buffer copy size and offset."); - } + // Unaligned copy -- use special compute shader. + EnsureComputeEncoder(); + _cce.setComputePipelineState(_gd.GetUnalignedBufferCopyPipeline()); + _cce.setBuffer(mtlSrc.DeviceBuffer, UIntPtr.Zero, (UIntPtr)0); + _cce.setBuffer(mtlDst.DeviceBuffer, UIntPtr.Zero, (UIntPtr)1); - EnsureBlitEncoder(); - MTLBuffer mtlSrc = Util.AssertSubtype(source); - MTLBuffer mtlDst = Util.AssertSubtype(destination); - _bce.copy( - mtlSrc.DeviceBuffer, (UIntPtr)sourceOffset, - mtlDst.DeviceBuffer, (UIntPtr)destinationOffset, - (UIntPtr)sizeInBytes); + MTLUnalignedBufferCopyInfo copyInfo; + copyInfo.SourceOffset = sourceOffset; + copyInfo.DestinationOffset = destinationOffset; + copyInfo.CopySize = sizeInBytes; + + _cce.setBytes(©Info, (UIntPtr)sizeof(MTLUnalignedBufferCopyInfo), (UIntPtr)2); + _cce.dispatchThreadGroups(new MTLSize(1, 1, 1), new MTLSize(1, 1, 1)); + } + else + { + EnsureBlitEncoder(); + _bce.copy( + mtlSrc.DeviceBuffer, (UIntPtr)sourceOffset, + mtlDst.DeviceBuffer, (UIntPtr)destinationOffset, + (UIntPtr)sizeInBytes); + } } protected override void CopyTextureCore( diff --git a/src/Veldrid/MTL/MTLGraphicsDevice.cs b/src/Veldrid/MTL/MTLGraphicsDevice.cs index f37c95da9..dce6e5062 100644 --- a/src/Veldrid/MTL/MTLGraphicsDevice.cs +++ b/src/Veldrid/MTL/MTLGraphicsDevice.cs @@ -1,6 +1,7 @@ using System; using System.Collections.Generic; using System.Diagnostics; +using System.IO; using System.Runtime.CompilerServices; using System.Runtime.InteropServices; using System.Threading; @@ -21,6 +22,12 @@ internal unsafe class MTLGraphicsDevice : GraphicsDevice private readonly object _resetEventsLock = new object(); private readonly List _resetEvents = new List(); + private const string UnalignedBufferCopyPipelineMacOSName = "MTL_UnalignedBufferCopy_macOS"; + private const string UnalignedBufferCopyPipelineiOSName = "MTL_UnalignedBufferCopy_iOS"; + private readonly object _unalignedBufferCopyPipelineLock = new object(); + private MTLShader _unalignedBufferCopyShader; + private MTLComputePipelineState _unalignedBufferCopyPipeline; + public MTLDevice Device => _device; public MTLCommandQueue CommandQueue => _commandQueue; public MTLFeatureSupport Features { get; } @@ -225,6 +232,11 @@ private MappedResource MapTexture(MTLTexture texture, MapMode mode, uint subreso protected override void PlatformDispose() { WaitForIdle(); + if (!_unalignedBufferCopyPipeline.IsNull) + { + _unalignedBufferCopyShader.Dispose(); + ObjectiveCRuntime.release(_unalignedBufferCopyPipeline.NativePtr); + } _mainSwapchain?.Dispose(); ObjectiveCRuntime.release(_commandQueue.NativePtr); ObjectiveCRuntime.release(_device.NativePtr); @@ -294,5 +306,40 @@ public override void ResetFence(Fence fence) { Util.AssertSubtype(fence).Reset(); } + + internal MTLComputePipelineState GetUnalignedBufferCopyPipeline() + { + lock (_unalignedBufferCopyPipelineLock) + { + if (_unalignedBufferCopyPipeline.IsNull) + { + MTLComputePipelineDescriptor descriptor = MTLUtil.AllocInit( + nameof(MTLComputePipelineDescriptor)); + MTLPipelineBufferDescriptor buffer0 = descriptor.buffers[0]; + buffer0.mutability = MTLMutability.Mutable; + MTLPipelineBufferDescriptor buffer1 = descriptor.buffers[1]; + buffer0.mutability = MTLMutability.Mutable; + + Debug.Assert(_unalignedBufferCopyShader == null); + string name = Features.IsMacOS ? UnalignedBufferCopyPipelineMacOSName : UnalignedBufferCopyPipelineiOSName; + using (Stream resourceStream = typeof(MTLGraphicsDevice).Assembly.GetManifestResourceStream(name)) + { + byte[] data = new byte[resourceStream.Length]; + using (MemoryStream ms = new MemoryStream(data)) + { + resourceStream.CopyTo(ms); + ShaderDescription shaderDesc = new ShaderDescription(ShaderStages.Compute, data, "copy_bytes"); + _unalignedBufferCopyShader = new MTLShader(ref shaderDesc, this); + } + } + + descriptor.computeFunction = _unalignedBufferCopyShader.Function; + _unalignedBufferCopyPipeline = _device.newComputePipelineStateWithDescriptor(descriptor); + ObjectiveCRuntime.release(descriptor.NativePtr); + } + + return _unalignedBufferCopyPipeline; + } + } } } diff --git a/src/Veldrid/MTL/MTLUnalignedBufferCopyInfo.cs b/src/Veldrid/MTL/MTLUnalignedBufferCopyInfo.cs new file mode 100644 index 000000000..d0a23f96e --- /dev/null +++ b/src/Veldrid/MTL/MTLUnalignedBufferCopyInfo.cs @@ -0,0 +1,9 @@ +namespace Veldrid.MTL +{ + internal struct MTLUnalignedBufferCopyInfo + { + public uint SourceOffset; + public uint DestinationOffset; + public uint CopySize; + } +} \ No newline at end of file diff --git a/src/Veldrid/ResourceFactory.cs b/src/Veldrid/ResourceFactory.cs index 10a60e44a..60bade41d 100644 --- a/src/Veldrid/ResourceFactory.cs +++ b/src/Veldrid/ResourceFactory.cs @@ -165,6 +165,10 @@ public DeviceBuffer CreateBuffer(ref BufferDescription description) { throw new VeldridException("Buffers with Staging Usage must not specify any other Usage flags."); } + if ((description.Usage & BufferUsage.UniformBuffer) != 0 && (description.SizeInBytes % 16) != 0) + { + throw new VeldridException($"Uniform buffer size must be a multiple of 16 bytes."); + } #endif return CreateBufferCore(ref description); } diff --git a/src/Veldrid/Veldrid.csproj b/src/Veldrid/Veldrid.csproj index d59701a67..276e7331e 100644 --- a/src/Veldrid/Veldrid.csproj +++ b/src/Veldrid/Veldrid.csproj @@ -36,6 +36,15 @@ + + + MTL_UnalignedBufferCopy_macOS + + + MTL_UnalignedBufferCopy_iOS + + + A low-level, hardware-accelerated graphics and compute library for .NET, with backends for Direct3D 11, Vulkan, and OpenGL. Veldrid can be used to create high-performance 2D and 3D games, simulations, tools, and other graphical applications. 3D Graphics Direct3D DirectX Vulkan OpenGL Metal Core Standard Game