# Probabilistic Parser Combinators

In [None]:
import Text.Megaparsec
import Text.Megaparsec.Char
import Data.Char
import qualified Data.Text as T
import Control.Monad.Bayes.Class
import Control.Monad.Bayes.Sampler.Strict
import Control.Monad.Bayes.Weighted
import Control.Monad.Bayes.Population
import Control.Monad.Bayes.Inference.SMC
import Control.Monad.Trans (lift)
import Control.Monad (join, replicateM)
import Data.Void
import Text.Pretty.Simple
:e OverloadedStrings
:e FlexibleContexts
:e GADTs
:e LambdaCase

Probability interfaces nicely with parser combinators from libraries like `megaparsec`. A parser in this setting is roughly a function `[Char] -> m (a, [Char])`, in other words a function which (monadically) strips off a prefix of the input string and returns a result. 

To make this probabilistic, we simply set `m` to a probability monad. The result of parsing is then a distribution over possible parses (and possible parse failures).


In [10]:
noise = \case
    "g" -> lift $ uniformD ["g","h"]
    "h" -> return "h"

parser = do
    true <- lift $ uniformD ["g", "h"]
    predicted <- noise true
    observed <- lookAhead ("g" <|> "h")
    lift $ condition $ predicted == observed
    string observed
    return $ head true 

errorBundlePretty' :: (TraversableStream s, VisualStream s) => ParseErrorBundle s Void -> String 
errorBundlePretty' = errorBundlePretty

-- foo :: MonadInfer m => m T.Text

run parser input = (either (T.pack . errorBundlePretty' ) (T.pack . show) ) <$> runParserT parser "" input

In [11]:
x <- sampleIO $ runPopulation $ smc SMCConfig {numSteps = 2, numParticles = 100, resampler = resampleMultinomial} $ run (some parser <* eof) "hh"
pPrint $ toEmpiricalWeighted x 

: 

In [4]:
import Debug.Trace

two = do
    x <- parser
    y <- parser
    lift $ factor $ case (x,y) of ('h','g') -> 1; _ -> 0.5
    return [x ,y]
    
duplicate = do
    -- x <- parser
    b <- lift $ bernoulli 0.5
    if b then "h" else "g"
    -- char x
    -- lift $ factor $ case (x,y) of ('h','g') -> 1; _ -> 0.5
    -- return x
    


In [7]:
import Control.Monad.Bayes.Enumerator

x <- sampleIO $ runPopulation $ smcMultinomial 2 100 $ run (duplicate) "hg"
pPrint $ toEmpiricalWeighted x

[
    ( ""h""
    , 0.7000000000000001
    )
,
    ( "1:1:
        |
      1 | hg
        | ^
      unexpected 'h'
      expecting 'g'
      "
    , 0.3
    )
]

In [28]:
p = do 
    x <- lift (uniformD ['a', 'b'])
    lift $ condition (x == 'a')
    char x
    -- char =<< lift (uniformD ['a', 'b'])
    -- lift $ uniformD ['a', 'b']

x <- sampleIO $ runPopulation $ smcMultinomial 2 100 $ run p "aa"
pPrint $ toEmpiricalWeighted x

[
    ( "'a'"
    , 1.0
    )
]

In [90]:
noise = \case
    True -> lift $ uniformD ["g","h"]
    False -> return "h"

parser = do
    true <- lift $ bernoulli 0.9
    predicted <- noise true
    observed <- lookAhead ("g" <|> "h")
    lift $ condition $ predicted == observed
    string observed
    return true 

errorBundlePretty' :: (TraversableStream s, VisualStream s) => ParseErrorBundle s Void -> String 
errorBundlePretty' = errorBundlePretty


x <- sampleIO $ runPopulation $ smcMultinomial 2 100 $ run (some parser) "gh"
pPrint $ toEmpiricalWeighted x

[
    ( "[True,True]"
    , 0.85
    )
,
    ( "[True,False]"
    , 0.14999999999999994
    )
]

In [None]:
-- parser: inferring state of the world: parse sentence, infer state.

In [None]:
ex = do
     