From 2b24036a13dddca636c158e00df99a18b373b574 Mon Sep 17 00:00:00 2001 From: doyoung-gwak Date: Sat, 13 Mar 2021 01:49:10 +0900 Subject: [PATCH 1/3] Add .mlmodel format to .gitignore --- .gitignore | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.gitignore b/.gitignore index fd22316..2bf08f4 100644 --- a/.gitignore +++ b/.gitignore @@ -68,3 +68,5 @@ fastlane/screenshots/**/*.png fastlane/test_output .DS_Store + +*.mlmodel From 5ff132e15ec2c7896c8a2c819e5f37183e6acf5a Mon Sep 17 00:00:00 2001 From: doyoung-gwak Date: Sat, 13 Mar 2021 01:51:02 +0900 Subject: [PATCH 2/3] Support face-parsing semantic segmentation model #12 and support to render multiple colors #13 --- .../project.pbxproj | 18 ++- .../LiveImageViewController.swift | 5 +- .../LiveMetalCameraViewController.swift | 17 +-- .../MetalCamera/CameraTextureGenerater.swift | 8 +- .../MetalCamera/MaskTextureGenerater.swift | 106 ++++++++++++++++++ ...titargetSegmentationTextureGenerater.swift | 70 ++++++++++++ .../MetalCamera/Shaders/Shaders.metal | 54 ++++++++- 7 files changed, 263 insertions(+), 15 deletions(-) create mode 100644 SemanticSegmentation-CoreML/MetalCamera/MaskTextureGenerater.swift create mode 100644 SemanticSegmentation-CoreML/MetalCamera/MultitargetSegmentationTextureGenerater.swift diff --git a/SemanticSegmentation-CoreML.xcodeproj/project.pbxproj b/SemanticSegmentation-CoreML.xcodeproj/project.pbxproj index 2b896b4..8b03cf2 100644 --- a/SemanticSegmentation-CoreML.xcodeproj/project.pbxproj +++ b/SemanticSegmentation-CoreML.xcodeproj/project.pbxproj @@ -20,6 +20,7 @@ 71BBE06222E3400E00E74F11 /* VideoCapture.swift in Sources */ = {isa = PBXBuildFile; fileRef = 71BBE06022E3400E00E74F11 /* VideoCapture.swift */; }; 71BBE06322E3400E00E74F11 /* Measure.swift in Sources */ = {isa = PBXBuildFile; fileRef = 71BBE06122E3400E00E74F11 /* Measure.swift */; }; 71BBE06722E3446300E74F11 /* SegmentationResultMLMultiArray.swift in Sources */ = {isa = PBXBuildFile; fileRef = 71BBE06622E3446300E74F11 /* SegmentationResultMLMultiArray.swift */; }; + C4052DC025EFE8960040F98D /* MaskTextureGenerater.swift in Sources */ = {isa = PBXBuildFile; fileRef = C4052DBF25EFE8960040F98D /* MaskTextureGenerater.swift */; }; C4BB0D92256195AE00354C08 /* MetalRenderingDevice.swift in Sources */ = {isa = PBXBuildFile; fileRef = C4BB0D91256195AE00354C08 /* MetalRenderingDevice.swift */; }; C4BB0D96256195F800354C08 /* Maths.swift in Sources */ = {isa = PBXBuildFile; fileRef = C4BB0D95256195F800354C08 /* Maths.swift */; }; C4BB0D99256196A300354C08 /* CameraTextureGenerater.swift in Sources */ = {isa = PBXBuildFile; fileRef = C4BB0D98256196A300354C08 /* CameraTextureGenerater.swift */; }; @@ -29,6 +30,8 @@ C4BB0DA625619AA400354C08 /* MetalVideoView.swift in Sources */ = {isa = PBXBuildFile; fileRef = C4BB0DA525619AA400354C08 /* MetalVideoView.swift */; }; C4BB0DA925619C0400354C08 /* LiveMetalCameraViewController.swift in Sources */ = {isa = PBXBuildFile; fileRef = C4BB0DA825619C0400354C08 /* LiveMetalCameraViewController.swift */; }; C4BB0DB52561A47900354C08 /* Shaders.metal in Sources */ = {isa = PBXBuildFile; fileRef = C4BB0DB42561A47900354C08 /* Shaders.metal */; }; + C4DDEF4925FB779D000CF6A5 /* MultitargetSegmentationTextureGenerater.swift in Sources */ = {isa = PBXBuildFile; fileRef = C4DDEF4825FB779D000CF6A5 /* MultitargetSegmentationTextureGenerater.swift */; }; + C4DDEF7525FBCBC8000CF6A5 /* FaceParsing.mlmodel in Sources */ = {isa = PBXBuildFile; fileRef = C4DDEF7425FBCBC8000CF6A5 /* FaceParsing.mlmodel */; }; /* End PBXBuildFile section */ /* Begin PBXFileReference section */ @@ -47,6 +50,7 @@ 71BBE06022E3400E00E74F11 /* VideoCapture.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = VideoCapture.swift; sourceTree = ""; }; 71BBE06122E3400E00E74F11 /* Measure.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = Measure.swift; sourceTree = ""; }; 71BBE06622E3446300E74F11 /* SegmentationResultMLMultiArray.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = SegmentationResultMLMultiArray.swift; sourceTree = ""; }; + C4052DBF25EFE8960040F98D /* MaskTextureGenerater.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = MaskTextureGenerater.swift; sourceTree = ""; }; C4BB0D91256195AE00354C08 /* MetalRenderingDevice.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = MetalRenderingDevice.swift; sourceTree = ""; }; C4BB0D95256195F800354C08 /* Maths.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = Maths.swift; sourceTree = ""; }; C4BB0D98256196A300354C08 /* CameraTextureGenerater.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = CameraTextureGenerater.swift; sourceTree = ""; }; @@ -56,6 +60,8 @@ C4BB0DA525619AA400354C08 /* MetalVideoView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = MetalVideoView.swift; sourceTree = ""; }; C4BB0DA825619C0400354C08 /* LiveMetalCameraViewController.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = LiveMetalCameraViewController.swift; sourceTree = ""; }; C4BB0DB42561A47900354C08 /* Shaders.metal */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.metal; path = Shaders.metal; sourceTree = ""; }; + C4DDEF4825FB779D000CF6A5 /* MultitargetSegmentationTextureGenerater.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = MultitargetSegmentationTextureGenerater.swift; sourceTree = ""; }; + C4DDEF7425FBCBC8000CF6A5 /* FaceParsing.mlmodel */ = {isa = PBXFileReference; lastKnownFileType = file.mlmodel; path = FaceParsing.mlmodel; sourceTree = ""; }; /* End PBXFileReference section */ /* Begin PBXFrameworksBuildPhase section */ @@ -112,6 +118,7 @@ 71BBE05522E33BEB00E74F11 /* DeepLabV3.mlmodel */, 71BBE05722E33BEF00E74F11 /* DeepLabV3FP16.mlmodel */, 71BBE05922E33BF300E74F11 /* DeepLabV3Int8LUT.mlmodel */, + C4DDEF7425FBCBC8000CF6A5 /* FaceParsing.mlmodel */, ); path = mlmodel; sourceTree = ""; @@ -123,7 +130,9 @@ C4BB0DA525619AA400354C08 /* MetalVideoView.swift */, C4BB0D98256196A300354C08 /* CameraTextureGenerater.swift */, C4BB0D9F2561983C00354C08 /* SegmentationTextureGenerater.swift */, + C4DDEF4825FB779D000CF6A5 /* MultitargetSegmentationTextureGenerater.swift */, C4BB0DA2256199B200354C08 /* OverlayingTexturesGenerater.swift */, + C4052DBF25EFE8960040F98D /* MaskTextureGenerater.swift */, C4BB0D9B256196ED00354C08 /* Texture.swift */, C4BB0DB32561A46B00354C08 /* Shaders */, C4BB0D94256195E800354C08 /* Utils */, @@ -225,11 +234,14 @@ 71BBE06322E3400E00E74F11 /* Measure.swift in Sources */, C4BB0DA625619AA400354C08 /* MetalVideoView.swift in Sources */, 71BBE05822E33BEF00E74F11 /* DeepLabV3FP16.mlmodel in Sources */, + C4DDEF4925FB779D000CF6A5 /* MultitargetSegmentationTextureGenerater.swift in Sources */, 71BBE06722E3446300E74F11 /* SegmentationResultMLMultiArray.swift in Sources */, C4BB0DA925619C0400354C08 /* LiveMetalCameraViewController.swift in Sources */, C4BB0D9C256196ED00354C08 /* Texture.swift in Sources */, 71BBE05622E33BEB00E74F11 /* DeepLabV3.mlmodel in Sources */, + C4052DC025EFE8960040F98D /* MaskTextureGenerater.swift in Sources */, 71BBE05C22E33C6C00E74F11 /* StillImageViewController.swift in Sources */, + C4DDEF7525FBCBC8000CF6A5 /* FaceParsing.mlmodel in Sources */, C4BB0D96256195F800354C08 /* Maths.swift in Sources */, C4BB0D92256195AE00354C08 /* MetalRenderingDevice.swift in Sources */, 71BBE04622E33B2500E74F11 /* LiveImageViewController.swift in Sources */, @@ -384,14 +396,16 @@ ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon; CODE_SIGN_STYLE = Automatic; DEVELOPMENT_TEAM = 5WXJ4Z4H69; + GCC_OPTIMIZATION_LEVEL = s; INFOPLIST_FILE = "SemanticSegmentation-CoreML/Info.plist"; - IPHONEOS_DEPLOYMENT_TARGET = 12.0; + IPHONEOS_DEPLOYMENT_TARGET = 12.1; LD_RUNPATH_SEARCH_PATHS = ( "$(inherited)", "@executable_path/Frameworks", ); PRODUCT_BUNDLE_IDENTIFIER = "com.tucan9389.SemanticSegmentation-CoreML"; PRODUCT_NAME = "$(TARGET_NAME)"; + SWIFT_OPTIMIZATION_LEVEL = "-O"; SWIFT_VERSION = 5.0; TARGETED_DEVICE_FAMILY = "1,2"; }; @@ -404,7 +418,7 @@ CODE_SIGN_STYLE = Automatic; DEVELOPMENT_TEAM = 5WXJ4Z4H69; INFOPLIST_FILE = "SemanticSegmentation-CoreML/Info.plist"; - IPHONEOS_DEPLOYMENT_TARGET = 12.0; + IPHONEOS_DEPLOYMENT_TARGET = 12.1; LD_RUNPATH_SEARCH_PATHS = ( "$(inherited)", "@executable_path/Frameworks", diff --git a/SemanticSegmentation-CoreML/LiveImageViewController.swift b/SemanticSegmentation-CoreML/LiveImageViewController.swift index 5ce26b2..0d2c94a 100644 --- a/SemanticSegmentation-CoreML/LiveImageViewController.swift +++ b/SemanticSegmentation-CoreML/LiveImageViewController.swift @@ -24,7 +24,10 @@ class LiveImageViewController: UIViewController { // MARK - Core ML model // DeepLabV3(iOS12+), DeepLabV3FP16(iOS12+), DeepLabV3Int8LUT(iOS12+) - let segmentationModel = DeepLabV3Int8LUT() + // FaceParsing(iOS14+) + lazy var segmentationModel = { + return try! DeepLabV3() + }() // 11 Pro // DeepLabV3 : 37 465 1 diff --git a/SemanticSegmentation-CoreML/LiveMetalCameraViewController.swift b/SemanticSegmentation-CoreML/LiveMetalCameraViewController.swift index 30dd581..323b517 100644 --- a/SemanticSegmentation-CoreML/LiveMetalCameraViewController.swift +++ b/SemanticSegmentation-CoreML/LiveMetalCameraViewController.swift @@ -20,7 +20,7 @@ class LiveMetalCameraViewController: UIViewController { @IBOutlet weak var fpsLabel: UILabel! var cameraTextureGenerater = CameraTextureGenerater() - var segmentationTextureGenerater = SegmentationTextureGenerater() + var multitargetSegmentationTextureGenerater = MultitargetSegmentationTextureGenerater() var overlayingTexturesGenerater = OverlayingTexturesGenerater() var cameraTexture: Texture? @@ -30,8 +30,14 @@ class LiveMetalCameraViewController: UIViewController { var videoCapture: VideoCapture! // MARK - Core ML model - // DeepLabV3(iOS12+), DeepLabV3FP16(iOS12+), DeepLabV3Int8LUT(iOS12+) - let segmentationModel = DeepLabV3() + /// DeepLabV3(iOS12+), DeepLabV3FP16(iOS12+), DeepLabV3Int8LUT(iOS12+) + /// - labels: ["background", "aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tv"] + /// - number of labels: 21 + /// FaceParsing(iOS14+) + /// - labels: ["background", "skin", "l_brow", "r_brow", "l_eye", "r_eye", "eye_g", "l_ear", "r_ear", "ear_r", "nose", "mouth", "u_lip", "l_lip", "neck", "neck_l", "cloth", "hair", "hat"] + /// - number of labels: 19 + lazy var segmentationModel = { return try! DeepLabV3() }() + let numberOfLabels = 21 // <#if you changed the segmentationModel, you have to change the numberOfLabels#> // MARK: - Vision Properties var request: VNCoreMLRequest? @@ -138,16 +144,13 @@ extension LiveMetalCameraViewController { if let observations = request.results as? [VNCoreMLFeatureValueObservation], let segmentationmap = observations.first?.featureValue.multiArrayValue { - guard let row = segmentationmap.shape[0] as? Int, let col = segmentationmap.shape[1] as? Int else { return } - let targetClass = 15 // index of human category - guard let cameraTexture = cameraTexture, - let segmentationTexture = segmentationTextureGenerater.texture(segmentationmap, row, col, targetClass) else { + let segmentationTexture = multitargetSegmentationTextureGenerater.texture(segmentationmap, row, col, numberOfLabels) else { return } diff --git a/SemanticSegmentation-CoreML/MetalCamera/CameraTextureGenerater.swift b/SemanticSegmentation-CoreML/MetalCamera/CameraTextureGenerater.swift index 20ca623..71287d9 100644 --- a/SemanticSegmentation-CoreML/MetalCamera/CameraTextureGenerater.swift +++ b/SemanticSegmentation-CoreML/MetalCamera/CameraTextureGenerater.swift @@ -20,8 +20,7 @@ class CameraTextureGenerater: NSObject { CVMetalTextureCacheCreate(kCFAllocatorDefault, nil, sharedMetalRenderingDevice.device, nil, &videoTextureCache) } - func texture(from sampleBuffer: CMSampleBuffer) -> Texture? { - guard let cameraFrame = CMSampleBufferGetImageBuffer(sampleBuffer) else { return nil } + func texture(from cameraFrame: CVPixelBuffer) -> Texture? { guard let videoTextureCache = videoTextureCache else { return nil } let bufferWidth = CVPixelBufferGetWidth(cameraFrame) @@ -44,4 +43,9 @@ class CameraTextureGenerater: NSObject { return nil } } + + func texture(from sampleBuffer: CMSampleBuffer) -> Texture? { + guard let cameraFrame = CMSampleBufferGetImageBuffer(sampleBuffer) else { return nil } + return texture(from: cameraFrame) + } } diff --git a/SemanticSegmentation-CoreML/MetalCamera/MaskTextureGenerater.swift b/SemanticSegmentation-CoreML/MetalCamera/MaskTextureGenerater.swift new file mode 100644 index 0000000..49bedf8 --- /dev/null +++ b/SemanticSegmentation-CoreML/MetalCamera/MaskTextureGenerater.swift @@ -0,0 +1,106 @@ +// +// MaskTextureGenerater.swift +// SemanticSegmentation-CoreML +// +// Created by Eric on 2020/06/06. +// Updated by Doyoung Gwak on 2021/03/04. +// Copyright © 2020 Eric. All rights reserved. +// + +import MetalKit + +class MaskTextureGenerater: NSObject { + + private var pipelineState: MTLRenderPipelineState! + + private var textureBuffer1: MTLBuffer? + private var textureBuffer2: MTLBuffer? + + public var alphaValue: Float = 0.5 + + public override init() { + super.init() + setup() + } + + private func setup() { + setupPiplineState() + } + + private func setupPiplineState(_ colorPixelFormat: MTLPixelFormat = .bgra8Unorm) { + do { + let rpd = try sharedMetalRenderingDevice.generateRenderPipelineDescriptor("two_vertex_render_target", + "maskFragment", + colorPixelFormat) + pipelineState = try sharedMetalRenderingDevice.device.makeRenderPipelineState(descriptor: rpd) + } catch { + debugPrint(error) + } + } + + private func generateTextureBuffer(_ width: Int, _ height: Int, _ targetWidth: Int, _ targetHeight: Int) -> MTLBuffer? { + let targetRatio = Float(targetWidth)/Float(targetHeight) + let curRatio = Float(width)/Float(height) + + let coordinates: [Float] + + if targetRatio > curRatio { + let remainHeight = (Float(height) - Float(width) * targetRatio)/2.0 + let remainRatio = remainHeight/Float(height) + coordinates = [0.0, remainRatio, 1.0, remainRatio, 0.0, 1.0 - remainRatio, 1.0, 1.0 - remainRatio] + } else { + let remainWidth = (Float(width) - Float(height) * targetRatio)/2.0 + let remainRatio = remainWidth/Float(width) + coordinates = [remainRatio, 0.0, 1.0 - remainRatio, 0.0, remainRatio, 1.0, 1.0 - remainRatio, 1.0] + } + + let textureBuffer = sharedMetalRenderingDevice.device.makeBuffer(bytes: coordinates, + length: coordinates.count * MemoryLayout.size, + options: [])! + return textureBuffer + } + + func texture(_ source1: Texture, _ source2: Texture) -> Texture? { + let minX = min(source1.texture.width, source2.texture.width) + let minY = min(source1.texture.height, source2.texture.height) + + if textureBuffer1 == nil { + textureBuffer1 = generateTextureBuffer(source1.texture.width, source1.texture.height, minX, minY) + } + if textureBuffer2 == nil { + textureBuffer2 = generateTextureBuffer(source2.texture.width, source2.texture.height, minX, minY) + } + + let outputTexture = Texture(minX, minY, textureKey: source1.textureKey) + + let renderPassDescriptor = MTLRenderPassDescriptor() + let attachment = renderPassDescriptor.colorAttachments[0] + attachment?.clearColor = MTLClearColorMake(1, 0, 0, 1) + attachment?.texture = outputTexture.texture + attachment?.loadAction = .clear + attachment?.storeAction = .store + + let commandBuffer = sharedMetalRenderingDevice.commandQueue.makeCommandBuffer() + let commandEncoder = commandBuffer?.makeRenderCommandEncoder(descriptor: renderPassDescriptor) + + commandEncoder?.setFrontFacing(.counterClockwise) + commandEncoder?.setRenderPipelineState(pipelineState) + + let vertexBuffer = sharedMetalRenderingDevice.device.makeBuffer(bytes: standardImageVertices, + length: standardImageVertices.count * MemoryLayout.size, + options: [])! + vertexBuffer.label = "Vertices" + commandEncoder?.setVertexBuffer(vertexBuffer, offset: 0, index: 0) + commandEncoder?.setVertexBuffer(textureBuffer1, offset: 0, index: 1) + commandEncoder?.setVertexBuffer(textureBuffer2, offset: 0, index: 2) + + commandEncoder?.setFragmentTexture(source1.texture, index: 0) + commandEncoder?.setFragmentTexture(source2.texture, index: 1) + + commandEncoder?.drawPrimitives(type: .triangleStrip, vertexStart: 0, vertexCount: 4) + commandEncoder?.endEncoding() + commandBuffer?.commit() + + return outputTexture + } +} diff --git a/SemanticSegmentation-CoreML/MetalCamera/MultitargetSegmentationTextureGenerater.swift b/SemanticSegmentation-CoreML/MetalCamera/MultitargetSegmentationTextureGenerater.swift new file mode 100644 index 0000000..3cc4f22 --- /dev/null +++ b/SemanticSegmentation-CoreML/MetalCamera/MultitargetSegmentationTextureGenerater.swift @@ -0,0 +1,70 @@ +// +// MultitargetSegmentationTextureGenerater.swift +// SemanticSegmentation-CoreML +// +// Created by Doyoung Gwak on 2021/03/12. +// Copyright © 2021 Doyoung Gwak. All rights reserved. +// + +import MetalKit +import CoreML + +class MultitargetSegmentationTextureGenerater: NSObject { + + private var pipelineState: MTLRenderPipelineState! + private var render_target_vertex: MTLBuffer! + private var render_target_uniform: MTLBuffer! + + private func setupPiplineState(_ colorPixelFormat: MTLPixelFormat = .bgra8Unorm, width: Int, height: Int) { + do { + let rpd = try sharedMetalRenderingDevice.generateRenderPipelineDescriptor("vertex_render_target", + "multitarget_segmentation_render_target", + colorPixelFormat) + pipelineState = try sharedMetalRenderingDevice.device.makeRenderPipelineState(descriptor: rpd) + + render_target_vertex = sharedMetalRenderingDevice.makeRenderVertexBuffer(size: CGSize(width: width, height: height)) + render_target_uniform = sharedMetalRenderingDevice.makeRenderUniformBuffer(CGSize(width: width, height: height)) + } catch { + debugPrint(error) + } + } + + func texture(_ segmentationMap: MLMultiArray, _ row: Int, _ col: Int, _ numberOfClasses: Int) -> Texture? { + if pipelineState == nil { + setupPiplineState(width: col, height: row) + } + + let outputTexture = Texture(col, row, textureKey: "multitargetsegmentation") + + let renderPassDescriptor = MTLRenderPassDescriptor() + let attachment = renderPassDescriptor.colorAttachments[0] + attachment?.clearColor = .red + attachment?.texture = outputTexture.texture + attachment?.loadAction = .clear + attachment?.storeAction = .store + + let commandBuffer = sharedMetalRenderingDevice.commandQueue.makeCommandBuffer() + let commandEncoder = commandBuffer?.makeRenderCommandEncoder(descriptor: renderPassDescriptor) + + commandEncoder?.setRenderPipelineState(pipelineState) + + commandEncoder?.setVertexBuffer(render_target_vertex, offset: 0, index: 0) + commandEncoder?.setVertexBuffer(render_target_uniform, offset: 0, index: 1) + + let segmentationBuffer = sharedMetalRenderingDevice.device.makeBuffer(bytes: segmentationMap.dataPointer, + length: segmentationMap.count * MemoryLayout.size, + options: [])! + commandEncoder?.setFragmentBuffer(segmentationBuffer, offset: 0, index: 0) + + let uniformBuffer = sharedMetalRenderingDevice.device.makeBuffer(bytes: [Int32(numberOfClasses), Int32(col), Int32(row)] as [Int32], + length: 3 * MemoryLayout.size, + options: [])! + commandEncoder?.setFragmentBuffer(uniformBuffer, offset: 0, index: 1) + + commandEncoder?.drawPrimitives(type: .triangleStrip, vertexStart: 0, vertexCount: 4) + commandEncoder?.endEncoding() + commandBuffer?.commit() + + return outputTexture + } +} diff --git a/SemanticSegmentation-CoreML/MetalCamera/Shaders/Shaders.metal b/SemanticSegmentation-CoreML/MetalCamera/Shaders/Shaders.metal index 5ee04cf..304aaa0 100644 --- a/SemanticSegmentation-CoreML/MetalCamera/Shaders/Shaders.metal +++ b/SemanticSegmentation-CoreML/MetalCamera/Shaders/Shaders.metal @@ -104,14 +104,62 @@ typedef struct fragment float4 segmentation_render_target(Vertex vertex_data [[ stage_in ]], constant SegmentationValue *segmentation [[ buffer(0) ]], constant SegmentationUniform& uniform [[ buffer(1) ]]) - { int index = int(vertex_data.position.x) + int(vertex_data.position.y) * uniform.width; if(segmentation[index].classNum == uniform.targetClass) { - return float4(1.0, 0, 0, 1.0); + return float4(1.0, 0, 0, 1.0); // red // (r, g, b, a) } - return float4(0,0,0,1.0); + return float4(0,0,0,1.0); // black +}; + +typedef struct +{ + int32_t numberOfClasses; + int32_t width; + int32_t height; +} MultitargetSegmentationUniform; + +fragment float4 multitarget_segmentation_render_target(Vertex vertex_data [[ stage_in ]], + constant SegmentationValue *segmentation [[ buffer(0) ]], + constant MultitargetSegmentationUniform& uniform [[ buffer(1) ]]) +{ + int index = int(vertex_data.position.x) + int(vertex_data.position.y) * uniform.width; + + if (segmentation[index].classNum == 0) { // background case + return float4(0,0,0,1.0); // black + } + + float h_ratio = float(segmentation[index].classNum) / float(uniform.numberOfClasses); + h_ratio = (1.0 - h_ratio) + 0.12/*extra value*/; + h_ratio = h_ratio > 1.0 ? h_ratio - 1.0 : h_ratio; + float h = 360 * h_ratio; + + float angle = h; //(h >= 360.0 ? 0.0 : h); + float sector = angle / 60.0; // Sector + float i = floor(sector); + int i_int = int(sector); + float f = sector - i; // Factorial part of h + + float p = 0.0; + float q = 1.0 - f; + float t = f; + + if (i_int == 0) { + return float4(1.0, t, p, 1.0); + } else if (i_int == 1) { + return float4(q, 1.0, p, 1.0); + } else if (i_int == 2) { + return float4(p, 1.0, t, 1.0); + } else if (i_int == 3) { + return float4(p, q, 1.0, 1.0); + } else if (i_int == 4) { + return float4(t, p, 1.0, 1.0); + } else { + return float4(1.0, p, q, 1.0); + } + + return float4(0,0,0,1.0); // black }; fragment half4 lookupFragment(TwoInputVertex fragmentInput [[stage_in]], From 483f93fad1f1db6f3b107846ed788ef4c6d5a5a4 Mon Sep 17 00:00:00 2001 From: doyoung-gwak Date: Wed, 17 Mar 2021 22:03:47 +0900 Subject: [PATCH 3/3] Update README for FaceParsing --- README.md | 52 +++++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 41 insertions(+), 11 deletions(-) diff --git a/README.md b/README.md index eda33da..f63d51b 100644 --- a/README.md +++ b/README.md @@ -6,9 +6,9 @@ This project is Object Segmentation on iOS with Core ML.
If you are interested in iOS + Machine Learning, visit [here](https://github.com/motlabs/iOS-Proejcts-with-ML-Models) you can see various DEMOs.
-| DEMO | Screenshot 1 | Screenshot 2 | Screenshot 3 | -| ------------------------------------------------------------ | --------------------------------------------- | --------------------------------------------- | --------------------------------------------- | -| | | | | +| DeepLabV3-DEMO1 | FaseParsing-DEMO | DeepLabV3-DEMO-2 | DeepLabV3-DEMO-3 | +| ------------------------------------------------------------ | ------------------------------------------------------------ | --------------------------------------------- | --------------------------------------------- | +| | | | | ## How it works @@ -22,7 +22,7 @@ This project is Object Segmentation on iOS with Core ML.
If you are intereste - iOS 12.0+ - Swift 5 -## Model +## Models ### Download @@ -30,13 +30,14 @@ Download model from [apple's model page](https://developer.apple.com/machine-lea ### Matadata -| | input node | output node | size | -| :--------------: | :---------------------------------: | :-----------------------------------------: | :----: | -| DeepLabV3 | `[1, 513, 513, 3]`
name: `image` | `[513, 513]`
name: `semanticPredictions` | 8.6 MB | -| DeepLabV3FP16 | `[1, 513, 513, 3]`
name: `image` | `[513, 513]`
name: `semanticPredictions` | 4.3 MB | -| DeepLabV3Int8LUT | `[1, 513, 513, 3]`
name: `image` | `[513, 513]`
name: `semanticPredictions` | 2.3 MB | +| Name | Input | Output | Size | iOS version+ | Download | +| :--------------- | :-----------------------: | :----------------------------: | :-----: | :----------: | :----------------------------------------------------------: | +| DeepLabV3 | `Image (Color 513 × 513)` | `MultiArray (Int32 513 × 513)` | 8.6 MB | iOS 12.0+ | [link](https://developer.apple.com/machine-learning/models/) | +| DeepLabV3FP16 | `Image (Color 513 × 513)` | `MultiArray (Int32 513 × 513)` | 4.3 MB | iOS 12.0+ | [link](https://developer.apple.com/machine-learning/models/) | +| DeepLabV3Int8LUT | `Image (Color 513 × 513)` | `MultiArray (Int32 513 × 513)` | 2.3 MB | iOS 12.0+ | [link](https://developer.apple.com/machine-learning/models/) | +| FaceParsing | `Image (Color 512 × 512)` | `MultiArray (Int32)` 512 × 512 | 52.7 MB | iOS 14.0+ | [link](https://github.com/tucan9389/SemanticSegmentation-CoreML/releases/download/support-face-parsing/FaceParsing.mlmodel) | -### Inference Time +### Inference Time − DeepLabV3 | Device | Inference Time | Total Time (GPU) | Total Time (CPU) | | ----------------- | :------------: | :--------------: | :--------------: | @@ -60,9 +61,38 @@ Download model from [apple's model page](https://developer.apple.com/machine-lea ⏲: need to measure +### Inference Time − FaceParsing + +| Device | Inference Time | Total Time (GPU) | Total Time (CPU) | +| ------------- | :------------: | :--------------: | :--------------: | +| iPhone 12 Pro | ⏲ | ⏲ | ⏲ | +| iPhone 11 Pro | 37 ms | 37 ms | ⏲ | + +### Labels − DeepLabV3 + +``` +# total 21 +["background", "aeroplane", "bicycle", "bird", "boat", +"bottle", "bus", "car", "cat", "chair", +"cow", "diningtable", "dog", "horse", "motorbike", +"person", "pottedplant", "sheep", "sofa", "train", +"tv"] +``` + +### Labels − FaceParsing + +``` +# total 19 +["background", "skin", "l_brow", "r_brow", "l_eye", +"r_eye", "eye_g", "l_ear", "r_ear", "ear_r", +"nose", "mouth", "u_lip", "l_lip", "neck", +"neck_l", "cloth", "hair", "hat"] +``` + ## See also - [motlabs/iOS-Proejcts-with-ML-Models](https://github.com/motlabs/iOS-Proejcts-with-ML-Models)
: The challenge using machine learning model created from tensorflow on iOS -- [deeplab on TensorFlow](https://github.com/tensorflow/models/tree/master/research/deeplab)
+- [DeepLab on TensorFlow](https://github.com/tensorflow/models/tree/master/research/deeplab)
: The repository providing DeepLabV3 model +- [FaceParsing](https://github.com/zllrunning/face-parsing.PyTorch)
: The repository providing the FaceParsing pytorch model