Switch branches/tags
Nothing to show
Find file
Fetching contributors…
Cannot retrieve contributors at this time
976 lines (924 sloc) 27 KB
;;; stem.el --- Routines for stemming
;;; $Id$
;;; Author: Tsuchiya Masatoshi <>
;; URL:
;; Keywords: stemming
;;; License:
;; This program is free software: you can redistribute it and/or modify
;; it under the terms of the GNU General Public License as published by
;; the Free Software Foundation, either version 3 of the License, or
;; (at your option) any later version.
;; This program is distributed in the hope that it will be useful,
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;; GNU General Public License for more details.
;; You should have received a copy of the GNU General Public License
;; along with this program. If not, see <>.
;;; Commentary:
;; This library is basing on thesis 'An algorithm for suffix stripping
;; (M.F.Porter)' that trim English word's suffix.
;; 論文『An algorithm for suffix stripping (M.F.Porter)』に記述されて
;; いるアルゴリズムに基づいて、英単語の語尾を取り除くためのライブラリ。
;; 利用及び再配布の際は、GNU 一般公用許諾書の適当なバージョンにしたがっ
;; て下さい。
;; 一次配布元
;;; History:
;; 2012/8/26 Register stem.el to GitHub to utilize from Emacs another packages.
;; And add GNU Public license for English.
;; (by Yuta Yamada cokesboy"at"
;;; Code:
;; -*- Emacs-Lisp -*-
(defvar stem:minimum-word-length 4
"Minimum word length that can apply Porter's algorithm.")
;;; Private functions / 非公開関数
;; To up execution speed, there are changing external variable in
;; function, possibility of unexpected side effects may occur with
;; high. Therefore, do not call private function.
;; 動作速度を向上させるために、関数内部で外部変数をいじっている
;; 関数があり、予期しない副作用が発生する可能性が高い。従って、
;; 非公開関数を直接呼び出すことは避けること。
;; stemming-rule の条件節を記述する関数群
(defsubst stem:match (arg) "\
変数 str を検査する非公開関数 (語幹の部分を変数 stem に代入する)"
(string-match arg str)
(setq stem (substring str 0 (match-beginning 0)))))
(defsubst stem:m () "\
変数 stem に含まれている VC の数を求める非公開関数"
(let ((pos 0)(m 0))
(while (string-match "\\(a\\|e\\|i\\|o\\|u\\|[^aeiou]y+\\)[aeiou]*" stem pos)
(setq m (1+ m))
(setq pos (match-end 0)))
(if (= pos (length stem)) (1- m) m))))
(defsubst stem:m> (i) "\
変数 stem に含まれている VC の数の条件を記述する非公開関数"
(< i (stem:m)))
(defsubst stem:m= (i) "\
変数 stem に含まれている VC の数の条件を記述する非公開関数"
(= i (stem:m)))
(defsubst stem:*v* () "\
変数 stem が母音を含んでいるか検査する関数"
(if (string-match "\\(a\\|e\\|i\\|o\\|u\\|[^aeiou]y\\)" stem) t)))
(defsubst stem:*o () "\
変数 stem が cvc の形で終っているか検査する関数"
(if (string-match "[^aeiou][aeiouy][^aeiouwxy]$" stem) t))))
;; stemming-rule を記述した関数群
(defun stem:step1a (str) "第1a段階の stemming rule (非公開関数)"
(let ((s)(stem))
(if (setq s (cond
((stem:match "sses$") "ss")
((stem:match "ies$") "i")
((stem:match "ss$") "ss")
((stem:match "s$") "")))
(concat stem s)
(defun stem:step1b (str) "第1b段階の stemming rule (非公開関数)"
(let ((s)(stem))
((and (stem:match "eed$") (stem:m> 0))
(concat stem "ee"))
((or (and (not stem) (stem:match "ed$") (stem:*v*))
(and (stem:match "ing$") (stem:*v*)))
(if (and (stem:m= 1) (stem:*o))
(concat stem "e")
(setq str stem)
(if (setq s (cond
((stem:match "at$") "ate")
((stem:match "bl$") "ble")
((stem:match "iz$") "ize")
((stem:match "\\([^lsz]\\)\\1$")
(substring str (match-beginning 1) (match-end 1)))))
(concat stem s)
(t str))))
(defun stem:step1c (str) "第1c段階の stemming rule (非公開関数)"
(let ((stem))
(if (and (stem:match "y$")
(concat stem "i")
(defun stem:step1 (str) "第1段階の stemming rule (非公開関数)"
(stem:step1a str))))
(defun stem:step2 (str) "第2段階の stemming rule (非公開関数)"
(let ((s)(stem))
(if (and
(setq s (cond
((stem:match "ational$") "ate")
((stem:match "tional$") "tion")
((stem:match "enci$") "ence")
((stem:match "anci$") "ance")
((stem:match "izer$") "ize")
((stem:match "abli$") "able")
((stem:match "alli$") "al")
((stem:match "entli$") "ent")
((stem:match "eli$") "e")
((stem:match "ousli$") "ous")
((stem:match "ization$") "ize")
((stem:match "ation$") "ate")
((stem:match "ator$") "ate")
((stem:match "alism$") "al")
((stem:match "iveness$") "ive")
((stem:match "fulness$") "ful")
((stem:match "ousness$") "ous")
((stem:match "aliti$") "al")
((stem:match "iviti$") "ive")
((stem:match "biliti$") "ble")))
(stem:m> 0))
(concat stem s)
(defun stem:step3 (str) "第3段階の stemming rule (非公開関数)"
(let ((s)(stem))
(if (and
(setq s (cond
((stem:match "icate$") "ic")
((stem:match "ative$") "")
((stem:match "alize$") "al")
((stem:match "iciti$") "ic")
((stem:match "ical$") "ic")
((stem:match "ful$") "")
((stem:match "ness$") "")))
(stem:m> 0))
(concat stem s)
(defun stem:step4 (str) "第4段階の stemming rule (非公開関数)"
(let ((stem))
(if (and (or
(stem:match "al$")
(stem:match "ance$")
(stem:match "ence$")
(stem:match "er$")
(stem:match "ic$")
(stem:match "able$")
(stem:match "ible$")
(stem:match "ant$")
(stem:match "ement$")
(stem:match "ment$")
(stem:match "ent$")
(and (string-match "[st]\\(ion\\)$" str)
(setq stem (substring str 0 (match-beginning 1))))
(stem:match "ou$")
(stem:match "ism$")
(stem:match "ate$")
(stem:match "iti$")
(stem:match "ous$")
(stem:match "ive$")
(stem:match "ize$"))
(stem:m> 1))
stem str)))
(defun stem:step5 (str) "第5段階の stemming rule (非公開関数)"
(let ((stem))
(if (or
(and (stem:match "e$")
(or (stem:m> 1)
(and (stem:m= 1)
(not (stem:*o)))))
(and (stem:match "ll$")
(setq stem (concat stem "l"))
(stem:m> 1)))
stem str)))
(defvar stem:irregular-verb-alist
'(("abode" "abide")
("abided" "abide")
("alighted" "alight")
("arose" "arise")
("arisen" "arise")
("awoke" "awake")
("awaked" "awake")
("awoken" "awake")
("baby-sat" "baby-sit")
("backbit" "backbite")
("backbitten" "backbite")
("backslid" "backslide")
("backslidden" "backslide")
("was" "be" "am" "is" "are")
("were" "be" "am" "is" "are")
("been" "be" "am" "is" "are")
("bore" "bear")
("bare" "bear")
("borne" "bear")
("born" "bear")
("beat" "beat")
("beaten" "beat")
("befell" "befall")
("befallen" "befall")
("begot" "beget")
("begat" "beget")
("begotten" "beget")
("began" "begin")
("begun" "begin")
("begirt" "begird")
("begirded" "begird")
("beheld" "behold")
("bent" "bend")
("bended" "bend")
("bereaved" "bereave")
("bereft" "bereave")
("besought" "beseech")
("beseeched" "beseech")
("beset" "beset")
("bespoke" "bespeak")
("bespoken" "bespeak")
("bestrewed" "bestrew")
("bestrewn" "bestrew")
("bestrode" "bestride")
("bestrid" "bestride")
("bestridden" "bestride")
("bet" "bet")
("betted" "bet")
("betook" "betake")
("betaken" "betake")
("bethought" "bethink")
("bade" "bid")
("bid" "bid")
("bad" "bid")
("bedden" "bid")
("bided" "bide")
("bode" "bide")
("bound" "bind")
("bit" "bite")
("bitten" "bite")
("bled" "bleed")
("blended" "blend")
("blent" "blend")
("blessed" "bless")
("blest" "bless")
("blew" "blow")
("blown" "blow")
("blowed" "blow")
("bottle-fed" "bottle-feed")
("broke" "break")
("broken" "break")
("breast-fed" "breast-feed")
("bred" "breed")
("brought" "bring")
("broadcast" "broadcast")
("broadcasted" "broadcast")
("browbeat" "browbeat")
("browbeaten" "browbeat")
("built" "build")
("builded" "build")
("burned" "burn")
("burnt" "burn")
("burst" "burst")
("busted" "bust")
("bust" "bust")
("bought" "buy")
("cast" "cast")
("chid" "chide")
("chided" "chide")
("chidden" "chide")
("chose" "choose")
("chosen" "choose")
("clove" "cleave")
("cleft" "cleave")
("cleaved" "cleave")
("cloven" "cleave")
("clave" "cleave")
("clung" "cling")
("clothed" "clothe")
("clad" "clothe")
("colorcast" "colorcast")
("clorcasted" "colorcast")
("came" "come")
("come" "come")
("cost" "cost")
("costed" "cost")
("countersank" "countersink")
("countersunk" "countersink")
("crept" "creep")
("crossbred" "crossbreed")
("crowed" "crow")
("crew" "crow")
("cursed" "curse")
("curst" "curse")
("cut" "cut")
("dared" "dare")
("durst" "dare")
("dealt" "deal")
("deep-froze" "deep-freeze")
("deep-freezed" "deep-freeze")
("deep-frozen" "deep-freeze")
("dug" "dig")
("digged" "dig")
("dived" "dive")
("dove" "dive")
("did" "do")
("done" "do")
("drew" "draw")
("drawn" "draw")
("dreamed" "dream")
("dreamt" "dream")
("drank" "drink")
("drunk" "drink")
("dripped" "drip")
("dript" "drip")
("drove" "drive")
("drave" "drive")
("driven" "drive")
("dropped" "drop")
("dropt" "drop")
("dwelt" "dwell")
("dwelled" "dwell")
("ate" "eat")
("eaten" "eat")
("fell" "fall")
("fallen" "fall")
("fed" "feed")
("felt" "feel")
("fought" "fight")
("found" "find")
("fled" "fly" "flee")
("flung" "fling")
("flew" "fly")
("flied" "fly")
("flown" "fly")
("forbore" "forbear")
("forborne" "forbear")
("forbade" "forbid")
("forbad" "forbid")
("forbidden" "forbid")
("forecast" "forecast")
("forecasted" "forecast")
("forewent" "forego")
("foregone" "forego")
("foreknew" "foreknow")
("foreknown" "foreknow")
("foreran" "forerun")
("forerun" "forerun")
("foresaw" "foresee")
("foreseen" "foresee")
("foreshowed" "foreshow")
("foreshown" "foreshow")
("foretold" "foretell")
("forgot" "forget")
("forgotten" "forget")
("forgave" "forgive")
("forgiven" "forgive")
("forwent" "forgo")
("forgone" "forgo")
("forsook" "forsake")
("forsaken" "forsake")
("forswore" "forswear")
("forsworn" "forswear")
("froze" "freeze")
("frozen" "freeze")
("gainsaid" "gainsay")
("gelded" "geld")
("gelt" "geld")
("got" "get")
("gotten" "get")
("ghostwrote" "ghostwrite")
("ghostwritten" "ghostwrite")
("gilded" "gild")
("gilt" "gild")
("girded" "gird")
("girt" "gird")
("gave" "give")
("given" "give")
("gnawed" "gnaw")
("gnawn" "gnaw")
("went" "wend" "go")
("gone" "go")
("graved" "grave")
("graven" "grave")
("ground" "grind")
("gripped" "grip")
("gript" "grip")
("grew" "grow")
("grown" "grow")
("hamstrung" "hamstring")
("hamstringed" "hamstring")
("hung" "hang")
("hanged" "hang")
("had" "have")
("heard" "hear")
("heaved" "heave")
("hove" "heave")
("hewed" "hew")
("hewn" "hew")
("hid" "hide")
("hidden" "hide")
("hit" "hit")
("held" "hold")
("hurt" "hurt")
("indwelt" "indwell")
("inlaid" "inlay")
("inlet" "inlet")
("inputted" "input")
("input" "input")
("inset" "inset")
("insetted" "inset")
("interwove" "interweave")
("interweaved" "interweave")
("jigsawed" "jigsaw")
("jigsawn" "jigsaw")
("kept" "keep")
("knelt" "kneel")
("kneeled" "kneel")
("knitted" "knit")
("knit" "knit")
("knew" "know")
("known" "know")
("laded" "lade")
("laden" "lade")
("laid" "lay")
("led" "lead")
("leaned" "lean")
("leant" "lean")
("leaped" "leap")
("leapt" "leap")
("learned" "learn")
("learnt" "learn")
("left" "leave")
("lent" "lend")
("let" "let")
("lay" "lie")
("lain" "lie")
("lighted" "light")
("lit" "light")
("lip-read" "lip-read")
("lost" "lose")
("made" "make")
("meant" "mean")
("met" "meet")
("melted" "melt")
("methougt" "methinks")
;; ("-" "methinks")
("misbecame" "misbecome")
("misbecome" "misbecome")
("miscast" "miscast")
("miscasted" "miscast")
("misdealt" "misdeal")
("misdid" "misdo")
("misdone" "misdo")
("misgave" "misgive")
("misgiven" "misgive")
("mishit" "mishit")
("mislaid" "mislay")
("misled" "mislead")
("misread" "misread")
("misspelt" "misspell")
("missplled" "misspell")
("misspent" "misspend")
("mistook" "mistake")
("mistaken" "mistake")
("misunderstood" "misunderstand")
("mowed" "mow")
("mown" "mow")
("offset" "offset")
("outbid" "outbid")
("outbade" "outbid")
("outbidden" "outbid")
("outdid" "outdo")
("outdone" "outdo")
("outfought" "outfight")
("outgrew" "outgrown")
("outgrown" "outgrown")
("outlaid" "outlay")
("output" "output")
("outputted" "output")
("ooutputted" "output")
("outrode" "outride")
("outridden" "outride")
("outran" "outrun")
("outrun" "outrun")
("outsold" "outsell")
("outshone" "outshine")
("outshot" "outshoot")
("outwore" "outwear")
("outworn" "outwear")
("overbore" "overbear")
("overborne" "overbear")
("overbid" "overbid")
("overblew" "overblow")
("overblown" "overblow")
("overcame" "overcome")
("overcome" "overcome")
("overdid" "overdo")
("overdone" "overdo")
("overdrew" "overdraw")
("overdrawn" "overdraw")
("overdrank" "overdrink")
("overdrunk" "overdrink")
("overate" "overeat")
("overeaten" "overeat")
("overfed" "overfeed")
("overflowed" "overflow")
("overflown" "overfly" "overflow")
("overflew" "overfly")
("overgrew" "overgrow")
("overgrown" "overgrow")
("overhung" "overhang")
("overhanged" "overhang")
("ovearheard" "overhear")
("overlaid" "overlay")
("overleaped" "overleap")
("overleapt" "overleap")
("overlay" "overlie")
("overlain" "overlie")
("overpaid" "overpay")
("overrode" "override")
("overridden" "override")
("overran" "overrun")
("overrun" "overrun")
("oversaw" "oversee")
("overseen" "oversee")
("oversold" "oversell")
("overset" "overset")
("overshot" "overshoot")
("overspent" "overspend")
("overspread" "overspread")
("overtook" "overtake")
("overtaken" "overtake")
("overthrew" "overthrow")
("overthrown" "overthrow")
("overworked" "overwork")
("overwrought" "overwork")
("partook" "partake")
("partaken" "partake")
("paid" "pay")
("penned" "pen")
("pent" "pen")
("pinch-hit" "pinch-hit")
("pleaded" "plead")
("plead" "plead")
("pled" "plead")
("prepaid" "prepay")
("preset" "preset")
("proofread" "proofread")
("proved" "prove")
("proven" "prove")
("put" "put")
("quick-froze" "quick-freeze")
("quick-frozen" "quick-freeze")
("quit" "quit")
("quitted" "quit")
("read" "read")
("reaved" "reave")
("reft" "reave")
("rebound" "rebind")
("rebroadcast" "rebroadcast")
("rebroadcasted" "rebroadcast")
("rebuilt" "rebuild")
("recast" "recast")
("recasted" "recast")
("re-did" "re-do")
("re-done" "re-do")
("reeved" "reeve")
("rove" "reeve")
("reheard" "rehear")
("relaid" "relay")
("remade" "remake")
("rent" "rend")
("repaid" "repay")
("reread" "reread")
("reran" "rerun")
("rerun" "rerun")
("resold" "resell")
("reset" "reset")
("retook" "retake")
("retaken" "retake")
("retold" "retell")
("rethought" "rethink")
("rewound" "rewind")
("rewinded" "rewind")
("rewrote" "rewrite")
("rewritten" "rewrite")
("rid" "ride") ;; ("rid" "ride" "rid")
("ridded" "rid")
("rode" "ride")
("ridden" "ride")
("rang" "ring")
("rung" "ring")
("rose" "rise")
("risen" "rise")
("rived" "rive")
("riven" "rive")
("roughcast" "roughcast")
("roughhewed" "roughhew")
("roughhewn" "roughhew")
("ran" "run")
("run" "run")
("sawed" "saw")
("sawn" "saw")
("said" "say")
("saw" "see")
("seen" "see")
("sought" "seek")
("sold" "sell")
("sent" "send")
("set" "set")
("sewed" "sew")
("sewn" "sew")
("shook" "shake")
("shaken" "shake")
("shaved" "shave")
("shaven" "shave")
("sheared" "shear")
("shore" "shear")
("shorn" "shear")
("shed" "shed")
("shone" "shine")
("shined" "shine")
("shit" "shit")
("shat" "shit")
("shitted" "shit")
("shod" "shoe")
("shoed" "shoe")
("shot" "shoot")
("showed" "show")
("shown" "show")
("shredded" "shred")
("shred" "shred")
("shrank" "shrink")
("shrunk" "shrink")
("shrunken" "shrink")
("shrived" "shrive")
("shrove" "shrive")
("shriven" "shrive")
("shut" "shut")
("sight-read" "sight-read")
("simulcast" "simulcast")
("simulcasted" "simulcast")
("sang" "sing")
("sung" "sing")
("sank" "sink")
("sunk" "sink")
("sunken" "sink")
("sat" "sit")
("sate" "sit")
("slew" "slay")
("slain" "slay")
("slept" "sleep")
("slid" "slide")
("slidden" "slide")
("slunk" "slink")
("smelled" "smell")
("smelt" "smell")
("smote" "smite")
("smitten" "smite")
("smit" "smite")
("sowed" "sow")
("sown" "sow")
("spoke" "speak")
("spoken" "speak")
("sped" "speed")
("speeded" "speed")
("spelled" "spell")
("spelt" "spell")
("spellbound" "spellbind")
("spent" "spend")
("spilled" "spill")
("spilt" "spill")
("spun" "spin")
("span" "spin")
("spat" "spit")
("spit" "spit")
("split" "split")
("spoiled" "spoil")
("spoilt" "spoil")
("spoon-fed" "spoon-feed")
("spread" "spread")
("sprang" "spring")
("sprung" "spring")
("stood" "stand")
("staved" "stave")
("stove" "stave")
("stayed" "stay")
("staid" "stay")
("stole" "steal")
("stolen" "steal")
("stuck" "stick")
("stung" "sting")
("stank" "stink")
("stunk" "stink")
("strewed" "strew")
("strewn" "strew")
("strode" "stride")
("stridden" "stride")
("struck" "strike")
("stricken" "strike")
("strung" "string")
("strove" "strive")
("striven" "strive")
("sublet" "sublet")
("sunburned" "sunburn")
("sunburnt" "sunburn")
("swore" "swear")
("sware" "swear")
("sworn" "swear")
("sweat" "sweat")
("sweated" "sweat")
("swept" "sweep")
("swelled" "swell")
("swollen" "swell")
("swam" "swim")
("swum" "swim")
("swung" "swing")
("took" "take")
("taken" "take")
("taught" "teach")
("tore" "tear")
("torn" "tear")
("telecast" "telecast")
("telecasted" "telecast")
("told" "tell")
("thought" "think")
("thrived" "thrive")
("throve" "thrive")
("thriven" "thrive")
("threw" "thrown")
("thrown" "thrown")
("thrust" "thrust")
("tossed" "toss")
("tost" "toss")
("trod" "tread")
("treaded" "tread")
("trode" "tread")
("trodden" "tread")
("typecast" "typecast")
("typewrote" "typewrite")
("typewritten" "typewrite")
("unbent" "unbend")
("unbended" "unbend")
("unbound" "unbind")
("underbid" "underbid")
("underbidden" "underbid")
("undercut" "undercut")
("underwent" "undergo")
("undergone" "undergo")
("underlaid" "underlay")
("underlay" "underlie")
("underlain" "underlie")
("underpaid" "underpay")
("undersold" "undersell")
("undershot" "undershoot")
("understood" "understand")
("undertook" "undertake")
("undertaken" "undertake")
("underwrote" "underwrite")
("underwritten" "underwrite")
("undid" "undo")
("undone" "undo")
("undrew" "undraw")
("undrawn" "undraw")
("ungirded" "ungird")
("ungirt" "ungird")
("unlearnt" "unlearn")
("unlearned" "unlearn")
("unmade" "unmake")
("unsaid" "unsay")
("unstuck" "unstick")
("unstrung" "unstring")
("unwound" "unwind")
("upheld" "uphold")
("uprose" "uprise")
("uprisen" "uprise")
("upset" "upset")
("upswept" "upsweep")
("woke" "wake")
("waked" "wake")
("woken" "wake")
("waylaid" "waylay")
("wore" "wear")
("worn" "wear")
("wove" "weave")
("weaved" "weave")
("woven" "weave")
("wed" "wed")
("wedded" "wed")
("wept" "weep")
("wended" "wend")
("wetted" "wet")
("wet" "wet")
("won" "win")
("wound" "wind")
("winded" "wind")
("wiredrew" "wiredraw")
("wiredrawn" "wiredraw")
("wist" "wit")
("withdrew" "withdraw")
("withdrawn" "withdraw")
("withheld" "withhold")
("withstood" "withstand")
("worked" "work")
("wrought" "work")
("wrapped" "wrap")
("wrapt" "wrap")
("wrung" "wring")
("wrote" "write")
("writ" "write")
("written" "write"))
(defun stem:extra (str) "\
(or (assoc str stem:irregular-verb-alist)
(let (c l stem)
(setq l (cond
;; 比較級/最上級
((stem:match "\\([^aeiou]\\)\\1e\\(r\\|st\\)$")
(list (substring str (match-beginning 1) (match-end 1))
(substring str (match-beginning 0) (match-beginning 2))))
((stem:match "\\([^aeiou]\\)ie\\(r\\|st\\)$")
(setq c (substring str (match-beginning 1) (match-end 1)))
(list c (concat c "y") (concat c "ie")))
((stem:match "e\\(r\\|st\\)$") '("" "e"))
;; 3単現/複数形
((stem:match "ches$") '("ch" "che"))
((stem:match "shes$") '("sh" "che"))
((stem:match "ses$") '("s" "se"))
((stem:match "xes$") '("x" "xe"))
((stem:match "zes$") '("z" "ze"))
((stem:match "ves$") '("f" "fe"))
((stem:match "\\([^aeiou]\\)oes$")
(setq c (substring str -4 -3))
(list c (concat c "o") (concat c "oe")))
((stem:match "\\([^aeiou]\\)ies$")
(setq c (substring str -4 -3))
(list c (concat c "y") (concat c "ie")))
((stem:match "es$") '("" "e"))
((stem:match "s$") '(""))
;; 過去形/過去分詞
((stem:match "\\([^aeiou]\\)ied$")
(setq c (substring str -4 -3))
(list c (concat c "y") (concat c "ie")))
((stem:match "\\([^aeiou]\\)\\1ed$")
(list (substring str -4 -3)
(substring str -4 -1)))
((stem:match "cked$") '("c" "cke"))
((stem:match "ed$") '("" "e"))
;; 現在分詞
((stem:match "\\([^aeiou]\\)\\1ing$")
(list (substring str -5 -4)))
((stem:match "ing$") '("" "e"))
(append (mapcar (lambda (s) (concat stem s)) l)
(list str))
;;; Public functions / 公開関数
(defun stem:stripping-suffix (str) "\
(delq nil (let ((w ""))
(function (lambda (x) (if (string= x w) nil (setq w x))))
(sort (append
;; 大文字を小文字に変換
(list (prog1 str (setq str (downcase str))))
;; 独自のヒューリスティックスを適用
(stem:extra str)
(if (> (length str) stem:minimum-word-length)
;; 単語長が条件を満たせば、Porter のアルゴリズムを適用
(lambda (func)
(setq str (funcall func str)))
'(stem:step1 stem:step2 stem:step3 stem:step4 stem:step5))))
(defun stem-english (str) "\
(sort (stem:stripping-suffix str)
(function (lambda (a b) (< (length a) (length b))))))
;; この stem-english の動作は、
;; Id: stem.el,v 1.4 1998/11/30 09:27:27 tsuchiya Exp tsuchiya
;; 以前のバージョンの stem.el で定義されていた stem:stripping-suffix
;; の動作と互換である。現在の stem:stripping-suffix は辞書順のリストを
;; 返すため、異なる動作とするようになっているので注意すること。
;;; Porter のアルゴリズムを適用する関数
(defun stem:stripping-inflection (word) "\
Porter のアルゴリズムに基づいて派生語を処理する関数"
(stem:step1 word)))))))
(provide 'stem)
;; Local Variables:
;; coding: utf-8
;; mode: emacs-lisp
;; End:
;;; stem.el ends here