feat(DiscoveryV1): Tokenization dictionaries for collections

Adds the methods createTokenizationDictionary(), getTokenizationDictionaryStatus(), and deleteTokenizationDictionary(). This feature is currently only supported for Japanese-language collections.
watson-developer-cloud · Nov 2, 2018 · d274371 · d274371
1 parent 54d1cca
commit d274371
Show file tree

Hide file tree

Showing 5 changed files with 365 additions and 0 deletions.
diff --git a/Source/DiscoveryV1/Discovery.swift b/Source/DiscoveryV1/Discovery.swift
@@ -1432,6 +1432,183 @@ public class Discovery {
         }
     }
 
+    /**
+     Get tokenization dictionary status.
+
+     Returns the current status of the tokenization dictionary for the specified collection.
+
+     - parameter environmentID: The ID of the environment.
+     - parameter collectionID: The ID of the collection.
+     - parameter headers: A dictionary of request headers to be sent with this request.
+     - parameter failure: A function executed if an error occurs.
+     - parameter success: A function executed with the successful result.
+     */
+    public func getTokenizationDictionaryStatus(
+        environmentID: String,
+        collectionID: String,
+        headers: [String: String]? = nil,
+        failure: ((Error) -> Void)? = nil,
+        success: @escaping (TokenDictStatusResponse) -> Void)
+    {
+        // construct header parameters
+        var headerParameters = defaultHeaders
+        if let headers = headers {
+            headerParameters.merge(headers) { (_, new) in new }
+        }
+        headerParameters["Accept"] = "application/json"
+
+        // construct query parameters
+        var queryParameters = [URLQueryItem]()
+        queryParameters.append(URLQueryItem(name: "version", value: version))
+
+        // construct REST request
+        let path = "/v1/environments/\(environmentID)/collections/\(collectionID)/word_lists/tokenization_dictionary"
+        guard let encodedPath = path.addingPercentEncoding(withAllowedCharacters: .urlPathAllowed) else {
+            failure?(RestError.encodingError)
+            return
+        }
+        let request = RestRequest(
+            session: session,
+            authMethod: authMethod,
+            errorResponseDecoder: errorResponseDecoder,
+            method: "GET",
+            url: serviceURL + encodedPath,
+            headerParameters: headerParameters,
+            queryItems: queryParameters
+        )
+
+        // execute REST request
+        request.responseObject {
+            (response: RestResponse<TokenDictStatusResponse>) in
+            switch response.result {
+            case .success(let retval): success(retval)
+            case .failure(let error): failure?(error)
+            }
+        }
+    }
+
+    /**
+     Create tokenization dictionary.
+
+     Upload a custom tokenization dictionary to use with the specified collection.
+
+     - parameter environmentID: The ID of the environment.
+     - parameter collectionID: The ID of the collection.
+     - parameter tokenizationRules: An array of tokenization rules. Each rule contains, the original `text` string,
+       component `tokens`, any alternate character set `readings`, and which `part_of_speech` the text is from.
+     - parameter headers: A dictionary of request headers to be sent with this request.
+     - parameter failure: A function executed if an error occurs.
+     - parameter success: A function executed with the successful result.
+     */
+    public func createTokenizationDictionary(
+        environmentID: String,
+        collectionID: String,
+        tokenizationRules: [TokenDictRule]? = nil,
+        headers: [String: String]? = nil,
+        failure: ((Error) -> Void)? = nil,
+        success: @escaping (TokenDictStatusResponse) -> Void)
+    {
+        // construct body
+        let createTokenizationDictionaryRequest = TokenDict(tokenizationRules: tokenizationRules)
+        guard let body = try? JSONEncoder().encodeIfPresent(createTokenizationDictionaryRequest) else {
+            failure?(RestError.serializationError)
+            return
+        }
+
+        // construct header parameters
+        var headerParameters = defaultHeaders
+        if let headers = headers {
+            headerParameters.merge(headers) { (_, new) in new }
+        }
+        headerParameters["Accept"] = "application/json"
+        headerParameters["Content-Type"] = "application/json"
+
+        // construct query parameters
+        var queryParameters = [URLQueryItem]()
+        queryParameters.append(URLQueryItem(name: "version", value: version))
+
+        // construct REST request
+        let path = "/v1/environments/\(environmentID)/collections/\(collectionID)/word_lists/tokenization_dictionary"
+        guard let encodedPath = path.addingPercentEncoding(withAllowedCharacters: .urlPathAllowed) else {
+            failure?(RestError.encodingError)
+            return
+        }
+        let request = RestRequest(
+            session: session,
+            authMethod: authMethod,
+            errorResponseDecoder: errorResponseDecoder,
+            method: "POST",
+            url: serviceURL + encodedPath,
+            headerParameters: headerParameters,
+            queryItems: queryParameters,
+            messageBody: body
+        )
+
+        // execute REST request
+        request.responseObject {
+            (response: RestResponse<TokenDictStatusResponse>) in
+            switch response.result {
+            case .success(let retval): success(retval)
+            case .failure(let error): failure?(error)
+            }
+        }
+    }
+
+    /**
+     Delete tokenization dictionary.
+
+     Delete the tokenization dictionary from the collection.
+
+     - parameter environmentID: The ID of the environment.
+     - parameter collectionID: The ID of the collection.
+     - parameter headers: A dictionary of request headers to be sent with this request.
+     - parameter failure: A function executed if an error occurs.
+     - parameter success: A function executed with the successful result.
+     */
+    public func deleteTokenizationDictionary(
+        environmentID: String,
+        collectionID: String,
+        headers: [String: String]? = nil,
+        failure: ((Error) -> Void)? = nil,
+        success: @escaping () -> Void)
+    {
+        // construct header parameters
+        var headerParameters = defaultHeaders
+        if let headers = headers {
+            headerParameters.merge(headers) { (_, new) in new }
+        }
+        headerParameters["Accept"] = "application/json"
+
+        // construct query parameters
+        var queryParameters = [URLQueryItem]()
+        queryParameters.append(URLQueryItem(name: "version", value: version))
+
+        // construct REST request
+        let path = "/v1/environments/\(environmentID)/collections/\(collectionID)/word_lists/tokenization_dictionary"
+        guard let encodedPath = path.addingPercentEncoding(withAllowedCharacters: .urlPathAllowed) else {
+            failure?(RestError.encodingError)
+            return
+        }
+        let request = RestRequest(
+            session: session,
+            authMethod: authMethod,
+            errorResponseDecoder: errorResponseDecoder,
+            method: "DELETE",
+            url: serviceURL + encodedPath,
+            headerParameters: headerParameters,
+            queryItems: queryParameters
+        )
+
+        // execute REST request
+        request.responseVoid {
+            (response: RestResponse) in
+            switch response.result {
+            case .success: success()
+            case .failure(let error): failure?(error)
+            }
+        }
+    }
+
     /**
      Add a document.
 

diff --git a/Source/DiscoveryV1/Models/TokenDict.swift b/Source/DiscoveryV1/Models/TokenDict.swift
@@ -0,0 +1,50 @@
+/**
+ * Copyright IBM Corporation 2018
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ **/
+
+import Foundation
+
+/**
+ Tokenization dictionary describing how words are tokenized during ingestion and at query time.
+ */
+internal struct TokenDict: Encodable {
+
+    /**
+     An array of tokenization rules. Each rule contains, the original `text` string, component `tokens`, any alternate
+     character set `readings`, and which `part_of_speech` the text is from.
+     */
+    public var tokenizationRules: [TokenDictRule]?
+
+    // Map each property name to the key that shall be used for encoding/decoding.
+    private enum CodingKeys: String, CodingKey {
+        case tokenizationRules = "tokenization_rules"
+    }
+
+    /**
+     Initialize a `TokenDict` with member variables.
+
+     - parameter tokenizationRules: An array of tokenization rules. Each rule contains, the original `text` string,
+       component `tokens`, any alternate character set `readings`, and which `part_of_speech` the text is from.
+
+     - returns: An initialized `TokenDict`.
+    */
+    public init(
+        tokenizationRules: [TokenDictRule]? = nil
+    )
+    {
+        self.tokenizationRules = tokenizationRules
+    }
+
+}
diff --git a/Source/DiscoveryV1/Models/TokenDictRule.swift b/Source/DiscoveryV1/Models/TokenDictRule.swift
@@ -0,0 +1,77 @@
+/**
+ * Copyright IBM Corporation 2018
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ **/
+
+import Foundation
+
+/**
+ An object defining a single tokenizaion rule.
+ */
+public struct TokenDictRule: Encodable {
+
+    /**
+     The string to tokenize.
+     */
+    public var text: String?
+
+    /**
+     Array of tokens that the `text` field is split into when found.
+     */
+    public var tokens: [String]?
+
+    /**
+     Array of tokens that represent the content of the `text` field in an alternate character set.
+     */
+    public var readings: [String]?
+
+    /**
+     The part of speech that the `text` string belongs to. For example `noun`. Custom parts of speech can be specified.
+     */
+    public var partOfSpeech: String?
+
+    // Map each property name to the key that shall be used for encoding/decoding.
+    private enum CodingKeys: String, CodingKey {
+        case text = "text"
+        case tokens = "tokens"
+        case readings = "readings"
+        case partOfSpeech = "part_of_speech"
+    }
+
+    /**
+     Initialize a `TokenDictRule` with member variables.
+
+     - parameter text: The string to tokenize.
+     - parameter tokens: Array of tokens that the `text` field is split into when found.
+     - parameter readings: Array of tokens that represent the content of the `text` field in an alternate character
+       set.
+     - parameter partOfSpeech: The part of speech that the `text` string belongs to. For example `noun`. Custom parts
+       of speech can be specified.
+
+     - returns: An initialized `TokenDictRule`.
+    */
+    public init(
+        text: String? = nil,
+        tokens: [String]? = nil,
+        readings: [String]? = nil,
+        partOfSpeech: String? = nil
+    )
+    {
+        self.text = text
+        self.tokens = tokens
+        self.readings = readings
+        self.partOfSpeech = partOfSpeech
+    }
+
+}
diff --git a/Source/DiscoveryV1/Models/TokenDictStatusResponse.swift b/Source/DiscoveryV1/Models/TokenDictStatusResponse.swift
@@ -0,0 +1,49 @@
+/**
+ * Copyright IBM Corporation 2018
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ **/
+
+import Foundation
+
+/**
+ Object describing the current status of the tokenization dictionary.
+ */
+public struct TokenDictStatusResponse: Decodable {
+
+    /**
+     Current tokenization dictionary status for the specified collection.
+     */
+    public enum Status: String {
+        case active = "active"
+        case pending = "pending"
+        case notFound = "not found"
+    }
+
+    /**
+     Current tokenization dictionary status for the specified collection.
+     */
+    public var status: String?
+
+    /**
+     The type for this dictionary. Always returns `tokenization_dictionary`.
+     */
+    public var type: String?
+
+    // Map each property name to the key that shall be used for encoding/decoding.
+    private enum CodingKeys: String, CodingKey {
+        case status = "status"
+        case type = "type"
+    }
+
+}