# Probabilistic Parser Combinators

In [7]:
import Text.Megaparsec
import Text.Megaparsec.Char
import Data.Char
import qualified Data.Text as T
import Control.Monad.Bayes.Class
import Control.Monad.Bayes.Sampler.Strict
import Control.Monad.Bayes.Weighted
import Control.Monad.Bayes.Population
import Control.Monad.Bayes.Enumerator
import Control.Monad.Bayes.Inference.SMC
import Control.Monad.Trans (lift)
import Control.Monad (join, replicateM)
import Data.Void
import Control.Monad.Bayes.Enumerator
import Text.Pretty.Simple

:e OverloadedStrings
:e FlexibleContexts
:e GADTs
:e LambdaCase

Probability interfaces nicely with parser combinators from libraries like `megaparsec`. A parser in this setting is roughly a function `[Char] -> m (a, [Char])`, in other words a function which (monadically) strips off a prefix of the input string and returns a result. 

To make this probabilistic, we simply set `m` to a probability monad. The result of parsing is then a distribution over possible parses (and possible parse failures).


In [52]:
noise x = do
    perturb <- lift $ bernoulli 0.5
    if perturb then lift $ uniformD (map (\x -> [x]) ['a'..'z']) else return x

letter = do
    true <-  lift $ uniformD ["h", "e", "l", "o", "x"]
    predicted <- noise true
    observed <- lookAhead ("h" <|> "e" <|> "l" <|> "o" <|> "x")
    lift $ condition $ predicted == observed
    string observed
    return $ head true 
    
word = (do 
    wd <- some letter
    lift $ factor (if wd `elem` ["hello", "goodbye"] then 10 else 1)
    return wd
    ) <* eof

errorBundlePretty' :: (TraversableStream s, VisualStream s) => ParseErrorBundle s Void -> String 
errorBundlePretty' = errorBundlePretty


run parser input = either (T.pack . errorBundlePretty' ) (T.pack . show) <$> runParserT parser "" input

In [None]:
x <- sampler $ population 
    $ smc SMCConfig {numSteps = 5, numParticles = 10000, resampler = resampleMultinomial} 
    $ run word "hxllo"
pPrint $ toEmpiricalWeighted x

In [4]:
import Debug.Trace

two = do
    x <- parser
    y <- parser
    lift $ factor $ case (x,y) of ('h','g') -> 1; _ -> 0.5
    return [x ,y]
    
duplicate = do
    -- x <- parser
    b <- lift $ bernoulli 0.5
    if b then "h" else "g"
    -- char x
    -- lift $ factor $ case (x,y) of ('h','g') -> 1; _ -> 0.5
    -- return x
    


In [5]:
x <- sampleIO $ runPopulation $ smcMultinomial 2 100 $ run (duplicate) "hg"
pPrint $ toEmpiricalWeighted x

: 

In [6]:
p = do 
    x <- lift (uniformD ['a', 'b'])
    lift $ condition (x == 'a')
    char x
    -- char =<< lift (uniformD ['a', 'b'])
    -- lift $ uniformD ['a', 'b']

x <- sampleIO $ runPopulation $ smcMultinomial 2 100 $ run p "aa"
pPrint $ toEmpiricalWeighted x

: 