Skip to content

Commit d2d08a5

Browse files
add allowEmptyMorpheme param with default false
1 parent ca5ea5a commit d2d08a5

File tree

1 file changed

+11
-7
lines changed

1 file changed

+11
-7
lines changed

src/main/java/com/worksap/nlp/elasticsearch/sudachi/ConfigAdapter.kt

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -36,17 +36,20 @@ class ConfigAdapter(anchor: PathAnchor, settings: Settings, env: Environment) {
3636
}
3737

3838
val discardPunctuation: Boolean = settings.getAsBoolean(PARAM_DISCARD_PUNCTUATION, true)
39-
39+
// default false to let every morpheme have non-null span in the input text
40+
val allowEmptyMorpheme: Boolean = settings.getAsBoolean(PARAM_ALLOW_EMPTY_MORPHEME, false)
4041
val mode = splitMode(settings)
4142

4243
private fun settingsFile(settings: Settings): Config {
4344
val settingsPath = settings.get(PARAM_SETTINGS_PATH)
44-
return if (settingsPath == null) {
45-
readDefaultConfig(basePath, fullAnchor)
46-
} else {
47-
val configObject = fullAnchor.resource<Any>(settingsPath)
48-
Config.fromResource(configObject, fullAnchor)
49-
}
45+
val base =
46+
if (settingsPath == null) {
47+
readDefaultConfig(basePath, fullAnchor)
48+
} else {
49+
val configObject = fullAnchor.resource<Any>(settingsPath)
50+
Config.fromResource(configObject, fullAnchor)
51+
}
52+
return base.allowEmptyMorpheme(allowEmptyMorpheme)
5053
}
5154

5255
companion object {
@@ -56,6 +59,7 @@ class ConfigAdapter(anchor: PathAnchor, settings: Settings, env: Environment) {
5659
const val PARAM_RESOURCES_PATH = "resources_path"
5760
const val PARAM_ADDITIONAL_SETTINGS = "additional_settings"
5861
const val PARAM_DISCARD_PUNCTUATION = "discard_punctuation"
62+
const val PARAM_ALLOW_EMPTY_MORPHEME = "allow_empty_morpheme"
5963

6064
const val DEFAULT_SETTINGS_FILENAME = "sudachi.json"
6165
const val DEFAULT_RESOURCE_PATH = "sudachi"

0 commit comments

Comments
 (0)