Skip to content

Commit 39ef731

Browse files
authored
Add files via upload
1 parent cfa9276 commit 39ef731

File tree

1 file changed

+197
-8
lines changed

1 file changed

+197
-8
lines changed

Diff for: Chapter 04/NLTK Chapter 4 - Exercises.ipynb

+197-8
Original file line numberDiff line numberDiff line change
@@ -3250,31 +3250,220 @@
32503250
},
32513251
{
32523252
"cell_type": "code",
3253-
"execution_count": null,
3253+
"execution_count": 120,
32543254
"metadata": {},
32553255
"outputs": [],
3256-
"source": []
3256+
"source": [
3257+
"def insert(trie, key, value):\n",
3258+
" if key:\n",
3259+
" first, rest = key[0], key[1:]\n",
3260+
" if first not in trie:\n",
3261+
" trie[first] = {}\n",
3262+
" insert(trie[first], rest, value)\n",
3263+
" else:\n",
3264+
" trie['value'] = value"
3265+
]
32573266
},
32583267
{
32593268
"cell_type": "code",
3260-
"execution_count": null,
3269+
"execution_count": 143,
32613270
"metadata": {},
32623271
"outputs": [],
3263-
"source": []
3272+
"source": [
3273+
"trie = {}\n",
3274+
"\n",
3275+
"en = [\"vandalism\", \"vandalize\", \"vane\", \"vanguard\", \"vanilla\", \"vanish\", \n",
3276+
" \"vanity\"]\n",
3277+
"fr = [\"vandalisme\", \"vandaliser\", \"girouette\", \"avant-garde\", \"vanille\", \n",
3278+
" \"disparaître\", \"vanité\"]\n",
3279+
"\n",
3280+
"[insert(trie, e, f) for e, f in zip(en, fr)];"
3281+
]
32643282
},
32653283
{
32663284
"cell_type": "code",
3267-
"execution_count": null,
3285+
"execution_count": 145,
3286+
"metadata": {},
3287+
"outputs": [
3288+
{
3289+
"name": "stdout",
3290+
"output_type": "stream",
3291+
"text": [
3292+
"{'v': {'a': {'n': {'d': {'a': {'l': {'i': {'s': {'m': {'value': 'vandalisme'}},\n",
3293+
" 'z': {'e': {'value': 'vandaliser'}}}}}},\n",
3294+
" 'e': {'value': 'girouette'},\n",
3295+
" 'g': {'u': {'a': {'r': {'d': {'value': 'avant-garde'}}}}},\n",
3296+
" 'i': {'l': {'l': {'a': {'value': 'vanille'}}},\n",
3297+
" 's': {'h': {'value': 'disparaître'}},\n",
3298+
" 't': {'y': {'value': 'vanité'}}}}}}}\n"
3299+
]
3300+
}
3301+
],
3302+
"source": [
3303+
"pprint.pprint(trie, width = 50)"
3304+
]
3305+
},
3306+
{
3307+
"cell_type": "code",
3308+
"execution_count": 375,
32683309
"metadata": {},
32693310
"outputs": [],
3270-
"source": []
3311+
"source": [
3312+
"# original\n",
3313+
"\n",
3314+
"def lookup(trie, word):\n",
3315+
" \n",
3316+
" if len(word) == 1:\n",
3317+
" return trie[word] \n",
3318+
" first, rest = word[0], word[1:]\n",
3319+
" if first not in trie:\n",
3320+
" return False\n",
3321+
" return lookup(trie[first], rest)\n"
3322+
]
32713323
},
32723324
{
32733325
"cell_type": "code",
3274-
"execution_count": null,
3326+
"execution_count": 373,
32753327
"metadata": {},
32763328
"outputs": [],
3277-
"source": []
3329+
"source": [
3330+
"# trial \n",
3331+
"\n",
3332+
"def lookup(trie, word):\n",
3333+
" \n",
3334+
" if len(word) == 1:\n",
3335+
" if 'value' in trie[word]:\n",
3336+
" \n",
3337+
" return trie[word]\n",
3338+
" \n",
3339+
" first, rest = word[0], word[1:]\n",
3340+
" if first not in trie:\n",
3341+
" return False\n",
3342+
" return lookup(trie[first], rest)\n"
3343+
]
3344+
},
3345+
{
3346+
"cell_type": "code",
3347+
"execution_count": 354,
3348+
"metadata": {},
3349+
"outputs": [],
3350+
"source": [
3351+
"# trial 2\n",
3352+
"\n",
3353+
"def lookup(trie, word):\n",
3354+
" \n",
3355+
" if len(word) == 1:\n",
3356+
" if 'value' in trie[word]:\n",
3357+
" \n",
3358+
" return trie[word]\n",
3359+
" else:\n",
3360+
" suffix = list(lookup(trie, word).keys())\n",
3361+
" for s in suffix:\n",
3362+
" return lookup(trie[word], s)\n",
3363+
" first, rest = word[0], word[1:]\n",
3364+
" if first not in trie:\n",
3365+
" return False\n",
3366+
" return lookup(trie[first], rest)\n"
3367+
]
3368+
},
3369+
{
3370+
"cell_type": "code",
3371+
"execution_count": 321,
3372+
"metadata": {},
3373+
"outputs": [
3374+
{
3375+
"name": "stdout",
3376+
"output_type": "stream",
3377+
"text": [
3378+
"{'l': {'i': {'s': {'m': {'value': 'vandalisme'}}, 'z': {'e': {'value': 'vandaliser'}}}}}\n"
3379+
]
3380+
}
3381+
],
3382+
"source": [
3383+
"for k, v in lookup(trie, 'vand').items():\n",
3384+
" print(v)"
3385+
]
3386+
},
3387+
{
3388+
"cell_type": "code",
3389+
"execution_count": 312,
3390+
"metadata": {},
3391+
"outputs": [
3392+
{
3393+
"data": {
3394+
"text/plain": [
3395+
"['a']"
3396+
]
3397+
},
3398+
"execution_count": 312,
3399+
"metadata": {},
3400+
"output_type": "execute_result"
3401+
}
3402+
],
3403+
"source": [
3404+
"#test = lookup(trie, 'vand')\n",
3405+
"list(trie['v']['a']['n']['d'].keys())\n"
3406+
]
3407+
},
3408+
{
3409+
"cell_type": "code",
3410+
"execution_count": 351,
3411+
"metadata": {},
3412+
"outputs": [
3413+
{
3414+
"data": {
3415+
"text/plain": [
3416+
"['u']"
3417+
]
3418+
},
3419+
"execution_count": 351,
3420+
"metadata": {},
3421+
"output_type": "execute_result"
3422+
}
3423+
],
3424+
"source": [
3425+
"list(lookup(trie, 'vang').keys())"
3426+
]
3427+
},
3428+
{
3429+
"cell_type": "code",
3430+
"execution_count": 352,
3431+
"metadata": {},
3432+
"outputs": [
3433+
{
3434+
"data": {
3435+
"text/plain": [
3436+
"{'a': {'r': {'d': {'value': 'avant-garde'}}}}"
3437+
]
3438+
},
3439+
"execution_count": 352,
3440+
"metadata": {},
3441+
"output_type": "execute_result"
3442+
}
3443+
],
3444+
"source": [
3445+
"lookup(trie['v']['a']['n']['g'], 'u')"
3446+
]
3447+
},
3448+
{
3449+
"cell_type": "code",
3450+
"execution_count": 339,
3451+
"metadata": {},
3452+
"outputs": [
3453+
{
3454+
"data": {
3455+
"text/plain": [
3456+
"[('value', 'vandalisme')]"
3457+
]
3458+
},
3459+
"execution_count": 339,
3460+
"metadata": {},
3461+
"output_type": "execute_result"
3462+
}
3463+
],
3464+
"source": [
3465+
"list(lookup(trie, 'vandalism').items())"
3466+
]
32783467
},
32793468
{
32803469
"cell_type": "code",

0 commit comments

Comments
 (0)