# Probabilistic Parser Combinators

In [1]:
import Text.Megaparsec
import Text.Megaparsec.Char
import Data.Char
import qualified Data.Text as T
import Control.Monad.Bayes.Class
import Control.Monad.Bayes.Sampler.Strict
import Control.Monad.Bayes.Weighted
import Control.Monad.Bayes.Population
import Control.Monad.Bayes.Enumerator
import Control.Monad.Bayes.Inference.SMC
import Control.Monad.Trans (lift)
import Control.Monad (join, replicateM)
import Data.Void
import Control.Monad.Bayes.Enumerator
import Text.Pretty.Simple

:e OverloadedStrings
:e FlexibleContexts
:e GADTs
:e LambdaCase

Probability interfaces nicely with parser combinators from libraries like `megaparsec`. A parser in this setting is roughly a function `[Char] -> m (a, [Char])`, in other words a function which (monadically) strips off a prefix of the input string and returns a result. 

To make this probabilistic, we simply set `m` to a probability monad. The result of parsing is then a distribution over possible parses (and possible parse failures).


In [2]:
alphabet = map (: []) ['h', 'i', 'x']

noise x = do
    perturb <- lift $ bernoulli 0.1
    if perturb then lift $ uniformD alphabet else return x

letter = do
    true <-  lift $ uniformD ["h", "i","x"]
    predicted <- noise true
    observed <- lookAhead (foldr1 (<|>) ["h","i", "x"])
    lift $ condition $ predicted == observed
    string observed
    return $ head true 
    
word = (do 
    wd <- some letter
    lift $ factor (if wd `elem` ["hi", "goodbye"] then 100 else 1)
    return wd
    ) <* eof

errorBundlePretty' :: (TraversableStream s, VisualStream s) => ParseErrorBundle s Void -> String 
errorBundlePretty' = errorBundlePretty


run parser input = either (T.pack . errorBundlePretty' ) (T.pack . show) <$> runParserT parser "" input

In [3]:
x <- sampler $ population 
    $ smc SMCConfig {numSteps = 7, numParticles = 2000, resampler = resampleMultinomial} 
    $ run word "hx"
pPrint $ toEmpiricalWeighted x



[
    ( ""hi""
    , 0.7989999999999997
    )
,
    ( ""hx""
    , 0.1815000000000003
    )
,
    ( ""xx""
    , 9.500000000000017 e- 3
    )
,
    ( ""hh""
    , 8.500000000000016 e- 3
    )
,
    ( ""ix""
    , 1.5000000000000033 e- 3
    )
]