Skip to content

Commit

Permalink
Merge pull request #5235 from sellout/refactor-transcript-parser
Browse files Browse the repository at this point in the history
  • Loading branch information
aryairani committed Jul 19, 2024
2 parents 8e469ad + b1cf123 commit ef62cd9
Show file tree
Hide file tree
Showing 9 changed files with 356 additions and 365 deletions.
50 changes: 50 additions & 0 deletions unison-cli/src/Unison/Codebase/Transcript.hs
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
{-# LANGUAGE PatternSynonyms #-}

-- | The data model for Unison transcripts.
module Unison.Codebase.Transcript
( ExpectingError,
ScratchFileName,
Hidden (..),
UcmLine (..),
UcmContext (..),
APIRequest (..),
pattern CMarkCodeBlock,
Stanza,
ProcessedBlock (..),
)
where

import CMark qualified
import Unison.Core.Project (ProjectBranchName, ProjectName)
import Unison.Prelude
import Unison.Project (ProjectAndBranch)

type ExpectingError = Bool

type ScratchFileName = Text

data Hidden = Shown | HideOutput | HideAll
deriving (Eq, Show)

data UcmLine
= UcmCommand UcmContext Text
| -- | Text does not include the '--' prefix.
UcmComment Text

-- | Where a command is run: a project branch (myproject/mybranch>).
data UcmContext
= UcmContextProject (ProjectAndBranch ProjectName ProjectBranchName)

data APIRequest
= GetRequest Text
| APIComment Text

pattern CMarkCodeBlock :: (Maybe CMark.PosInfo) -> Text -> Text -> CMark.Node
pattern CMarkCodeBlock pos info body = CMark.Node pos (CMark.CODE_BLOCK info body) []

type Stanza = Either CMark.Node ProcessedBlock

data ProcessedBlock
= Ucm Hidden ExpectingError [UcmLine]
| Unison Hidden ExpectingError (Maybe ScratchFileName) Text
| API [APIRequest]
166 changes: 166 additions & 0 deletions unison-cli/src/Unison/Codebase/Transcript/Parser.hs
Original file line number Diff line number Diff line change
@@ -0,0 +1,166 @@
-- | Parse and print CommonMark (like Github-flavored Markdown) transcripts.
module Unison.Codebase.Transcript.Parser
( -- * printing
formatAPIRequest,
formatUcmLine,
formatStanza,
formatNode,
formatProcessedBlock,

-- * conversion
processedBlockToNode,

-- * parsing
stanzas,
ucmLine,
apiRequest,
fenced,
hidden,
expectingError,
language,
)
where

import CMark qualified
import Data.Char qualified as Char
import Data.Text qualified as Text
import Data.These (These (..))
import Text.Megaparsec qualified as P
import Unison.Codebase.Transcript
import Unison.Prelude
import Unison.Project (ProjectAndBranch (ProjectAndBranch))

formatAPIRequest :: APIRequest -> Text
formatAPIRequest = \case
GetRequest txt -> "GET " <> txt
APIComment txt -> "-- " <> txt

formatUcmLine :: UcmLine -> Text
formatUcmLine = \case
UcmCommand context txt -> formatContext context <> "> " <> txt
UcmComment txt -> "--" <> txt
where
formatContext (UcmContextProject projectAndBranch) = into @Text projectAndBranch

formatStanza :: Stanza -> Text
formatStanza = either formatNode formatProcessedBlock

formatNode :: CMark.Node -> Text
formatNode = (<> "\n") . CMark.nodeToCommonmark [] Nothing

formatProcessedBlock :: ProcessedBlock -> Text
formatProcessedBlock = formatNode . processedBlockToNode

processedBlockToNode :: ProcessedBlock -> CMark.Node
processedBlockToNode = \case
Ucm _ _ cmds -> CMarkCodeBlock Nothing "ucm" $ foldr ((<>) . formatUcmLine) "" cmds
Unison _hide _ fname txt ->
CMarkCodeBlock Nothing "unison" $ maybe txt (\fname -> Text.unlines ["---", "title: " <> fname, "---", txt]) fname
API apiRequests -> CMarkCodeBlock Nothing "api" $ Text.unlines $ formatAPIRequest <$> apiRequests

type P = P.Parsec Void Text

stanzas :: FilePath -> Text -> Either (P.ParseErrorBundle Text Void) [Stanza]
stanzas srcName = (\(CMark.Node _ _DOCUMENT blocks) -> traverse stanzaFromNode blocks) . CMark.commonmarkToNode []
where
stanzaFromNode :: CMark.Node -> Either (P.ParseErrorBundle Text Void) Stanza
stanzaFromNode node = case node of
CMarkCodeBlock _ info body -> maybe (Left node) pure <$> P.parse (fenced info) srcName body
_ -> pure $ Left node

ucmLine :: P UcmLine
ucmLine = ucmCommand <|> ucmComment
where
ucmCommand :: P UcmLine
ucmCommand = do
context <-
P.try do
contextString <- P.takeWhile1P Nothing (/= '>')
context <-
case (tryFrom @Text contextString) of
(Right (These project branch)) -> pure (UcmContextProject (ProjectAndBranch project branch))
_ -> fail "expected project/branch or absolute path"
void $ lineToken $ word ">"
pure context
line <- P.takeWhileP Nothing (/= '\n') <* spaces
pure $ UcmCommand context line

ucmComment :: P UcmLine
ucmComment = do
word "--"
line <- P.takeWhileP Nothing (/= '\n') <* spaces
pure $ UcmComment line

apiRequest :: P APIRequest
apiRequest = do
apiComment <|> getRequest
where
getRequest = do
word "GET"
spaces
path <- P.takeWhile1P Nothing (/= '\n')
spaces
pure (GetRequest path)
apiComment = do
word "--"
comment <- P.takeWhileP Nothing (/= '\n')
spaces
pure (APIComment comment)

-- | Produce the correct parser for the code block based on the provided info string.
fenced :: Text -> P (Maybe ProcessedBlock)
fenced info = do
body <- P.getInput
P.setInput info
fenceType <- lineToken (word "ucm" <|> word "unison" <|> word "api" <|> language)
case fenceType of
"ucm" -> do
hide <- hidden
err <- expectingError
P.setInput body
pure . Ucm hide err <$> (spaces *> many ucmLine)
"unison" ->
do
-- todo: this has to be more interesting
-- ```unison:hide
-- ```unison
-- ```unison:hide:all scratch.u
hide <- lineToken hidden
err <- lineToken expectingError
fileName <- optional untilSpace1
P.setInput body
pure . Unison hide err fileName <$> (spaces *> P.getInput)
"api" -> do
P.setInput body
pure . API <$> (spaces *> many apiRequest)
_ -> pure Nothing

word :: Text -> P Text
word txt = P.try $ do
chs <- P.takeP (Just $ show txt) (Text.length txt)
guard (chs == txt)
pure txt

lineToken :: P a -> P a
lineToken p = p <* nonNewlineSpaces

nonNewlineSpaces :: P ()
nonNewlineSpaces = void $ P.takeWhileP Nothing (\ch -> ch == ' ' || ch == '\t')

hidden :: P Hidden
hidden =
(HideAll <$ word ":hide:all")
<|> (HideOutput <$ word ":hide")
<|> pure Shown

expectingError :: P ExpectingError
expectingError = isJust <$> optional (word ":error")

untilSpace1 :: P Text
untilSpace1 = P.takeWhile1P Nothing (not . Char.isSpace)

language :: P Text
language = P.takeWhileP Nothing (\ch -> Char.isDigit ch || Char.isLower ch || ch == '_')

spaces :: P ()
spaces = void $ P.takeWhileP (Just "spaces") Char.isSpace
Loading

0 comments on commit ef62cd9

Please sign in to comment.