-
Notifications
You must be signed in to change notification settings - Fork 47
/
Instance.hs
298 lines (273 loc) · 10.8 KB
/
Instance.hs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
-- |
-- Copyright: (C) 2013 Amgen, Inc.
--
-- Interaction with an instance of R. The interface in this module allows for
-- instantiating an arbitrary number of concurrent R sessions, even though
-- currently the R library only allows for one global instance, for forward
-- compatibility.
--
-- The 'R' monad defined here serves to give static guarantees that an instance
-- is only ever used after it has been initialized and before it is finalized.
-- Doing otherwise should result in a type error. This is done in the same way
-- that the 'Control.Monad.ST' monad encapsulates side effects: by assigning
-- a rank-2 type to the only run function for the monad.
--
-- This module is intended to be imported qualified.
{-# LANGUAGE CPP #-}
{-# LANGUAGE DataKinds #-}
{-# LANGUAGE ForeignFunctionInterface #-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{-# LANGUAGE ImpredicativeTypes #-}
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE RecordWildCards #-}
{-# LANGUAGE RecursiveDo #-}
{-# LANGUAGE TypeFamilies #-}
module Language.R.Instance
( -- * The R monad
R
, runRegion
, unsafeRunRegion
-- * R instance creation
, Config(..)
, defaultConfig
, withEmbeddedR
, initialize
, finalize
) where
import Control.Monad ((<=<), unless, when, zipWithM_)
import Control.Monad.Primitive (PrimMonad(..))
import Control.Monad.R.Class
import Control.Monad.ST.Unsafe (unsafeSTToIO)
import qualified Data.Semigroup as Sem
import Data.Monoid
import Data.Default.Class (Default(..))
import qualified Foreign.R as R
import qualified Foreign.R.Embedded as R
#ifndef mingw32_HOST_OS
import qualified Foreign.R.EventLoop as R
#endif
import Foreign.C.String
import Language.R.Globals
import Control.Applicative
import Control.Concurrent.MVar
( newMVar
, withMVar
, MVar
)
import Control.DeepSeq ( NFData, deepseq )
import Control.Exception
( bracket
, bracket_
, uninterruptibleMask_
)
import Control.Monad.Catch ( MonadCatch, MonadMask, MonadThrow )
#if MIN_VERSION_base(4,9,0)
import Control.Monad.Fail
#endif
import Control.Monad.Reader
import Data.IORef (IORef, newIORef, readIORef, modifyIORef')
import Foreign
( Ptr
, allocaArray
)
import Foreign.C.Types ( CInt(..) )
import Foreign.Storable (Storable(..))
import System.Environment ( getProgName, lookupEnv )
import System.IO.Unsafe ( unsafePerformIO )
import System.Process ( readProcess )
import System.SetEnv
#ifndef mingw32_HOST_OS
import Control.Exception ( onException )
import System.IO ( hPutStrLn, stderr )
import System.Posix.Resource
#endif
import Prelude
-- | The 'R' monad, for sequencing actions interacting with a single instance of
-- the R interpreter, much as the 'IO' monad sequences actions interacting with
-- the real world. The 'R' monad embeds the 'IO' monad, so all 'IO' actions can
-- be lifted to 'R' actions.
newtype R s a = R { unR :: ReaderT (IORef Int) IO a }
deriving (Applicative, Functor, Monad, MonadIO, MonadCatch, MonadMask, MonadThrow)
#if MIN_VERSION_base(4,9,0)
instance MonadFail (R s) where
fail s = R $ ReaderT $ \_ -> Control.Monad.Fail.fail s
#endif
instance PrimMonad (R s) where
type PrimState (R s) = s
primitive f = R $ lift $ unsafeSTToIO $ primitive f
instance MonadR (R s) where
io m = R $ ReaderT $ \_ -> m
acquire s = R $ ReaderT $ \cnt -> uninterruptibleMask_ $ do
x <- R.release <$> R.protect s
modifyIORef' cnt succ
return x
newtype ExecContext (R s) = ExecContext (IORef Int)
getExecContext = R $ ReaderT $ \ref -> return (ExecContext ref)
unsafeRunWithExecContext m (ExecContext ref) = runReaderT (unR m) ref
-- | Initialize a new instance of R, execute actions that interact with the
-- R instance and then finalize the instance. This is typically called at the
-- very beginning of the @main@ function of the program.
--
-- > main = withEmbeddedR $ do {...}
--
-- Note that R does not currently support reinitialization after finalization,
-- so this function should be called only once during the lifetime of the
-- program (see @src/unix/system.c:Rf_initialize()@ in the R source code).
withEmbeddedR :: Config -> IO a -> IO a
withEmbeddedR config = bracket_ (initialize config) finalize
-- | Run an R action in the global R instance from the IO monad. This action
-- provides no static guarantees that the R instance was indeed initialized and
-- has not yet been finalized. Make sure to call it within the scope of
-- `withEmbeddedR`.
--
-- @runRegion m@ fully evaluates the result of action @m@, to ensure that no
-- thunks hold onto resources in a way that would extrude the scope of the
-- region. This means that the result must be first-order data (i.e. not
-- a function).
--
-- @throws@ 'Foreign.R.Error'. Generaly any R function may throw @RError@ that
-- is safe to be cached and computation can proceed. However @RError@ will cancel
-- entire R block. So in order to catch exception in more fine grained way one
-- has to use function @tryCatch@ inside R block.
runRegion :: NFData a => (forall s. R s a) -> IO a
runRegion r = unsafeRunRegion r
unsafeRunRegion :: NFData a => R s a -> IO a
unsafeRunRegion r =
bracket (newIORef 0)
(R.unprotect <=< readIORef)
(\d -> do
x <- runReaderT (unR r) d
x `deepseq` return x)
-- | Configuration options for the R runtime. Configurations form monoids, so
-- arguments can be accumulated left-to-right through monoidal composition.
data Config = Config
{ -- | Program name. If 'Nothing' then the value of 'getProgName' will be
-- used.
configProgName :: Last String
-- | Command-line arguments.
, configArgs :: [String]
-- | Set to 'True' if you're happy to let R install its own signal handlers
-- during initialization. By default R sets following signal handlers:
--
-- * SIGPIPE - ignore signal;
-- * SIGUSR1 - save workspace and terminate program;
-- * SIGUSR2 - terminate program without saving workspace;
-- * SIGINT - cancel execution of the current function.
--
-- *N.B.* When program is terminated, haskell runtime will not have any chances
-- to run any exception handlers or finalizers.
, configSignalHandlers :: Last Bool
}
instance Default Config where
def = defaultConfig
instance Sem.Semigroup Config where
(<>) cfg1 cfg2 = Config
{ configProgName = configProgName cfg1 <> configProgName cfg2
, configArgs = configArgs cfg1 <> configArgs cfg2
, configSignalHandlers = configSignalHandlers cfg1 <> configSignalHandlers cfg2
}
instance Monoid Config where
mempty = defaultConfig
mappend = (<>)
-- | Default argument to pass to 'initialize'.
defaultConfig :: Config
defaultConfig = Config (Last Nothing) ["--vanilla", "--silent"] (Last (Just False))
-- | Populate environment with @R_HOME@ variable if it does not exist and
-- @R_LIBS@ variable if it doesn't exist either.
populateEnv :: IO ()
populateEnv = do
mh <- lookupEnv "R_HOME"
when (mh == Nothing) $
setEnv "R_HOME" =<< fmap (head . lines) (readProcess "R" ["-e","cat(R.home())","--quiet","--slave"] "")
ml <- lookupEnv "R_LIBS"
when (ml == Nothing) $
setEnv "R_LIBS" =<< fmap (head . lines) (readProcess "R" ["-e","cat(.libPaths(),sep=if (.Platform$OS.type == \"unix\") \":\" else \";\")","--quiet","--slave"] "")
-- | A static address that survives GHCi reloadings which indicates
-- whether R has been initialized.
foreign import ccall "missing_r.h &isRInitialized" isRInitializedPtr :: Ptr CInt
-- | Allocate and initialize a new array of elements.
newCArray :: Storable a
=> [a] -- ^ Array elements
-> (Ptr a -> IO r) -- ^ Continuation
-> IO r
newCArray xs k =
allocaArray (length xs) $ \ptr -> do
zipWithM_ (pokeElemOff ptr) [0..] xs
k ptr
-- | An MVar to make an atomic step of checking whether R is initialized and
-- initializing it if needed.
initLock :: MVar ()
initLock = unsafePerformIO $ newMVar ()
{-# NOINLINE initLock #-}
-- Note [Concurrent initialization]
-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
--
-- In 'initialize' we check a first time if R is initialized. This test is fast
-- since it happens without taking an MVar. If R needs initialization, after
-- taking the MVar we check again if R is initialized to avoid concurrent
-- threads from initializing R multiple times. The user is not expected to call
-- initialize multiple times concurrently, but there is nothing stopping the
-- compiler from doing so when compiling quasiquotes.
-- | Create a new embedded instance of the R interpreter. Only works from the
-- main thread of the program. That is, from the same thread of execution that
-- the program's @main@ function is running on. In GHCi, use @-fno-ghci-sandbox@
-- to achieve this.
initialize :: Config -> IO ()
initialize Config{..} = do
#ifndef mingw32_HOST_OS
#if defined(darwin_HOST_OS) || defined(freebsd_HOST_OS)
-- NOTE: OS X and FreeBSD does not allow removing the stack size limit completely,
-- instead forcing a hard limit of just under 64MB.
let stackLimit = ResourceLimit 67104768
#else
let stackLimit = ResourceLimitUnknown
#endif
setResourceLimit ResourceStackSize (ResourceLimits stackLimit stackLimit)
`onException` (hPutStrLn stderr $
"Language.R.Interpreter: "
++ "Cannot increase stack size limit."
++ "Try increasing your stack size limit manually:"
#ifdef darwin_HOST_OS
++ "$ launchctl limit stack 67104768"
++ "$ ulimit -s 65532"
#elif defined(freebsd_HOST_OS)
++ "$ ulimit -s 67104768"
#else
++ "$ ulimit -s unlimited"
#endif
)
#endif
initialized <- fmap (==1) $ peek isRInitializedPtr
-- See note [Concurrent initialization]
unless initialized $ withMVar initLock $ const $ do
initialized2 <- fmap (==1) $ peek isRInitializedPtr
unless initialized2 $ mdo
-- Grab addresses of R global variables
pokeRVariables
( R.baseEnv
, R.emptyEnv
, R.globalEnv
, R.nilValue
, R.unboundValue
, R.missingArg
, R.isRInteractive
, R.signalHandlers
#ifndef mingw32_HOST_OS
, R.inputHandlers
#endif
)
populateEnv
args <- (:) <$> maybe getProgName return (getLast configProgName)
<*> pure configArgs
argv <- mapM newCString args
let argc = length argv
unless (maybe False id $ getLast configSignalHandlers) $
poke signalHandlersPtr 0
newCArray argv $ R.initEmbeddedR argc
poke isRInteractive 0
poke isRInitializedPtr 1
-- | Finalize an R instance.
finalize :: IO ()
finalize = do
R.endEmbeddedR 0
poke isRInitializedPtr 0