{-# LANGUAGE Strict #-}
{-# LANGUAGE CPP #-}
{-# LANGUAGE NondecreasingIndentation #-}
{-# LANGUAGE MagicHash #-}
{-# LANGUAGE UnboxedTuples #-}
{-# OPTIONS_GHC -Wno-redundant-bang-patterns #-}
{-# OPTIONS_GHC -Wunused-imports #-}

-- Andreas, Makoto, Francesco 2014-10-15 AIM XX:
-- -O2 does not have any noticable effect on runtime
-- but sabotages cabal repl with -Werror
-- (due to a conflict with --interactive warning)
-- {-# OPTIONS_GHC -O2                      #-}

-- | Structure-sharing serialisation of Agda interface files.

-- -!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-
-- NOTE: Every time the interface format is changed the interface
-- version number should be bumped _in the same patch_.
--
-- See 'currentInterfaceVersion' below.
--
-- -!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-

module Agda.TypeChecking.Serialise
  ( InterfacePrefix
  , Encoded
  , encode
  , encodeFile
  , decode
  , decodeFile
  , decodeInterface
  , deserializeInterface
  , deserializeHashes
  )
  where

import Prelude hiding ( null )

import System.Directory ( createDirectoryIfMissing )
import System.FilePath ( takeDirectory )

import Control.Arrow (second)
import qualified Control.Exception as E
import Control.Monad
import Control.Monad.Reader
import Control.Monad.Trans.Maybe

import Data.Foldable (traverse_)
import Data.Word
import Data.ByteString (ByteString)
import qualified Data.ByteString as B
import qualified Data.ByteString.Lazy as LB
import qualified Data.List as List
import qualified Data.Text as T
import qualified Data.Text.Lazy as TL

import qualified Codec.Compression.Zstd as Z

import qualified Agda.TypeChecking.Monad.Benchmark as Bench

import Agda.TypeChecking.Serialise.Base
import Agda.TypeChecking.Serialise.Instances () --instance only
import Agda.TypeChecking.Monad
import Agda.Utils.Monad ((<*!>))
import Agda.Utils.Hash
import Agda.Utils.IORef
import Agda.Utils.Serialize
import Agda.Utils.VarSet (VarSet)
import Agda.Utils.Impossible
import qualified Agda.Utils.HashTable as H
import qualified Agda.Interaction.Options.ProfileOptions as Profile
import qualified Agda.Utils.MinimalArray.Lifted as AL
import qualified Agda.Utils.MinimalArray.MutableLifted as ML
import qualified Agda.Utils.CompactRegion as Compact

#include "MachDeps.h"

currentInterfaceVersion :: Word64
currentInterfaceVersion :: Hash
currentInterfaceVersion = Hash
20250826 Hash -> Hash -> Hash
forall a. Num a => a -> a -> a
* Hash
10 Hash -> Hash -> Hash
forall a. Num a => a -> a -> a
+ Hash
0

ifaceVersionSize :: Int
ifaceVersionSize :: Int
ifaceVersionSize = SIZEOF_WORD64

ifacePrefixSize :: Int
ifacePrefixSize :: Int
ifacePrefixSize = Int
2 Int -> Int -> Int
forall a. Num a => a -> a -> a
* Int
hashSize Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
ifaceVersionSize

type InterfacePrefix =
  ( Hash     -- sourceHash
  , Hash     -- fullHash
  , Word64   -- interface version
  )

-- | Hash-consed encoding of a value.
type Encoded =
  ( Word32             -- index of root Node
  , AL.Array Node
  , AL.Array String
  , AL.Array TL.Text
  , AL.Array T.Text
  , AL.Array Integer
  , AL.Array VarSet
  , AL.Array Double
  )

-- | Encodes something as a hash-consed tuple of arrays. To ensure relocatability, file paths in
--   positions are replaced with module names.
encode :: EmbPrj a => a -> TCM Encoded
encode :: forall a. EmbPrj a => a -> TCM Encoded
encode a
a = do
  collectStats <- ProfileOption -> TCMT IO Bool
forall (m :: * -> *). MonadDebug m => ProfileOption -> m Bool
hasProfileOption ProfileOption
Profile.Serialize
  newD         <- liftIO $ emptyDict collectStats
  root         <- liftIO $ (`runReaderT` newD) $ icode a

  (nodeA, stringA, lTextA, sTextA, integerA, varSetA, doubleA) <-
    Bench.billTo [Bench.Serialization, Bench.Sort] $ liftIO $ do
      (,,,,,,) <$!> toArray (nodeD newD)
               <*!> toArray (stringD newD)
               <*!> toArray (lTextD newD)
               <*!> toArray (sTextD newD)
               <*!> toArray (integerD newD)
               <*!> toArray (varSetD newD)
               <*!> toArray (doubleD newD)

  -- Record reuse statistics.
  whenProfile Profile.Sharing $ do
    statistics "pointers" (termC newD)
  whenProfile Profile.Serialize $ do
    statistics "Integer"     (integerC newD)
    statistics "VarSet"      (varSetC newD)
    statistics "Lazy Text"   (lTextC newD)
    statistics "Strict Text" (sTextC newD)
    statistics "String"      (stringC newD)
    statistics "Double"      (doubleC newD)
    statistics "Node"        (nodeC newD)
    statistics "Shared Term" (termC newD)
    statistics "A.QName"     (qnameC newD)
    statistics "A.Name"      (nameC newD)
  when collectStats $ do
    stats <- map (second fromIntegral) <$> do
      liftIO $ List.sort <$> H.toList (stats newD)
    traverse_ (uncurry tickN) stats

  pure (root, nodeA, stringA, lTextA, sTextA, integerA, varSetA, doubleA)
  where
    toArray :: H.HashTableLU k Word32 -> IO (AL.Array k)
    toArray :: forall k. HashTableLU k Word32 -> IO (Array k)
toArray HashTableLU k Word32
tbl = do
      size <- HashTableLU k Word32 -> IO Int
forall (ks :: * -> * -> *) k (vs :: * -> * -> *) v.
MVector ks k =>
HashTable ks k vs v -> IO Int
H.size HashTableLU k Word32
tbl
      arr <- ML.new size undefined
      H.forAssocs tbl \k
k Word32
v -> Array (PrimState IO) k -> Int -> k -> IO ()
forall (m :: * -> *) a.
PrimMonad m =>
Array (PrimState m) a -> Int -> a -> m ()
ML.write Array RealWorld k
Array (PrimState IO) k
arr (Word32 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word32
v) k
k
      ML.unsafeFreeze arr

    statistics :: String -> FreshAndReuse -> TCM ()
    statistics :: String -> FreshAndReuse -> TCMT IO ()
statistics String
kind FreshAndReuse
far = do
      fresh <- IO Word32 -> TCMT IO Word32
forall a. IO a -> TCMT IO a
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO Word32 -> TCMT IO Word32) -> IO Word32 -> TCMT IO Word32
forall a b. (a -> b) -> a -> b
$ FreshAndReuse -> IO Word32
getFresh FreshAndReuse
far
#ifdef DEBUG_SERIALISATION
      reused <- liftIO $ getReuse far
#endif
      tickN (kind ++ "  (fresh)") $ fromIntegral fresh
#ifdef DEBUG_SERIALISATION
      tickN (kind ++ " (reused)") $ fromIntegral reused
#endif

-- | Decode a hash-consed value. The result depends on the include path.
decode :: EmbPrj a => Encoded -> MaybeT TCM a
decode :: forall a. EmbPrj a => Encoded -> MaybeT (TCMT IO) a
decode Encoded
enc = do
  mf       <- TCM ModuleToSource -> MaybeT (TCMT IO) ModuleToSource
forall (m :: * -> *) a. Monad m => m a -> MaybeT m a
forall (t :: (* -> *) -> * -> *) (m :: * -> *) a.
(MonadTrans t, Monad m) =>
m a -> t m a
lift (TCM ModuleToSource -> MaybeT (TCMT IO) ModuleToSource)
-> TCM ModuleToSource -> MaybeT (TCMT IO) ModuleToSource
forall a b. (a -> b) -> a -> b
$ Lens' TCState ModuleToSource -> TCM ModuleToSource
forall (m :: * -> *) a. ReadTCState m => Lens' TCState a -> m a
useTC (ModuleToSource -> f ModuleToSource) -> TCState -> f TCState
Lens' TCState ModuleToSource
stModuleToSource
  includes <- lift $ getIncludeDirs
  arena    <- liftIO $ Compact.new (2 ^ 12) -- 4 KB blocks

  let compactElems :: AL.Array a -> IO (AL.Array a)
      compactElems = (a -> IO a) -> Array a -> IO (Array a)
forall a b. (a -> IO b) -> Array a -> IO (Array b)
AL.traverseIO' (Compact -> a -> IO a
forall a. Compact -> a -> IO a
Compact.add Compact
arena)

  (mf, res) <- tryDecode $ do
    let (r, nodeA, stringA, lTextA, sTextA, integerA, varSetA, doubleA) = enc

    stringA      <- compactElems stringA
    lTextA       <- compactElems lTextA
    sTextA       <- compactElems sTextA
    integerA     <- compactElems integerA
    varSetA      <- compactElems varSetA
    doubleA      <- compactElems doubleA
    filePathMemo <- H.empty
    nodeMemo     <- ML.new (AL.size nodeA) MEEmpty
    modFile      <- newIORef mf
    let dec = IOArray MemoEntry
-> Compact
-> Array Node
-> Array String
-> Array Text
-> Array Text
-> Array Integer
-> Array VarSet
-> Array Double
-> HashTable MVector AbsolutePath MVector AbsolutePath
-> IORef ModuleToSource
-> List1 AbsolutePath
-> Decode
Decode IOArray MemoEntry
nodeMemo Compact
arena Array Node
nodeA Array String
stringA Array Text
lTextA Array Text
sTextA Array Integer
integerA Array VarSet
varSetA
                     Array Double
doubleA HashTable MVector AbsolutePath MVector AbsolutePath
filePathMemo IORef ModuleToSource
modFile List1 AbsolutePath
includes
    res <- runReaderT (value r) dec
    mf  <- readIORef modFile
    pure (mf, res)

  lift $ setTCLens stModuleToSource mf
  pure res
  -- -- "Compact" the interfaces (without breaking sharing) to
  -- -- reduce the amount of memory that is traversed by the
  -- -- garbage collector.
  -- lift $ Bench.billTo [Bench.Deserialization, Bench.Compaction] $
  --   liftIO $ C.getCompact <$!> C.compactWithSharing res

getInterfacePrefix :: Interface -> InterfacePrefix
getInterfacePrefix :: Interface -> InterfacePrefix
getInterfacePrefix Interface
i =
  (Interface -> Hash
iSourceHash Interface
i, Interface -> Hash
iFullHash Interface
i, Hash
currentInterfaceVersion)

-- | Serialize an interface prefix + an encoded interface to a lazy bytestring.
--   The result is lazy in order to avoid copying data when we prepend the
--   prefix to the compressed interface bytestring.
serializeEncodedInterface :: InterfacePrefix -> Encoded -> TCM LB.ByteString
serializeEncodedInterface :: InterfacePrefix -> Encoded -> TCM ByteString
serializeEncodedInterface InterfacePrefix
prefix Encoded
i = do
  let doCompress :: ByteString -> f ByteString
doCompress ByteString
i = ByteString -> f ByteString
forall a. a -> f a
forall (f :: * -> *) a. Applicative f => a -> f a
pure (ByteString -> f ByteString) -> ByteString -> f ByteString
forall a b. (a -> b) -> a -> b
$! Int -> ByteString -> ByteString
Z.compress Int
1 ByteString
i -- we outline this to prevent let-lifting by GHC
      {-# NOINLINE doCompress #-}           -- which could ruin the benchmark timing.
  (prefix, i) <- Account (BenchPhase (TCMT IO))
-> TCMT IO (ByteString, ByteString)
-> TCMT IO (ByteString, ByteString)
forall (m :: * -> *) c.
MonadBench m =>
Account (BenchPhase m) -> m c -> m c
Bench.billTo [BenchPhase (TCMT IO)
Phase
Bench.Serialization, BenchPhase (TCMT IO)
Phase
Bench.BinaryEncode] (TCMT IO (ByteString, ByteString)
 -> TCMT IO (ByteString, ByteString))
-> TCMT IO (ByteString, ByteString)
-> TCMT IO (ByteString, ByteString)
forall a b. (a -> b) -> a -> b
$ IO (ByteString, ByteString) -> TCMT IO (ByteString, ByteString)
forall a. IO a -> TCMT IO a
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO (ByteString, ByteString) -> TCMT IO (ByteString, ByteString))
-> IO (ByteString, ByteString) -> TCMT IO (ByteString, ByteString)
forall a b. (a -> b) -> a -> b
$
                   (,) (ByteString -> ByteString -> (ByteString, ByteString))
-> IO ByteString -> IO (ByteString -> (ByteString, ByteString))
forall (m :: * -> *) a b. Monad m => (a -> b) -> m a -> m b
<$!> InterfacePrefix -> IO ByteString
forall a. Serialize a => a -> IO ByteString
serialize InterfacePrefix
prefix IO (ByteString -> (ByteString, ByteString))
-> IO ByteString -> IO (ByteString, ByteString)
forall (m :: * -> *) a b. Monad m => m (a -> b) -> m a -> m b
<*!> Encoded -> IO ByteString
forall a. Serialize a => a -> IO ByteString
serialize Encoded
i
  i <- Bench.billTo [Bench.Serialization, Bench.Compress] $ doCompress i
  pure $! LB.fromStrict prefix <> LB.fromStrict i

-- | Convert ErrorCall-s (which are assumed to be deserialization errors) to
--   MaybeT TCM.
tryDecode :: IO a -> MaybeT TCM a
tryDecode :: forall a. IO a -> MaybeT (TCMT IO) a
tryDecode IO a
act = TCM (Maybe a) -> MaybeT (TCMT IO) a
forall (m :: * -> *) a. m (Maybe a) -> MaybeT m a
MaybeT do
  res <- IO (Either String a) -> TCM (Either String a)
forall a. IO a -> TCMT IO a
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO ((a -> Either String a
forall a b. b -> Either a b
Right (a -> Either String a) -> IO a -> IO (Either String a)
forall (m :: * -> *) a b. Monad m => (a -> b) -> m a -> m b
<$!> IO a
act) IO (Either String a)
-> (ErrorCall -> IO (Either String a)) -> IO (Either String a)
forall e a. Exception e => IO a -> (e -> IO a) -> IO a
`E.catch` \(E.ErrorCall String
err) -> Either String a -> IO (Either String a)
forall a. a -> IO a
forall (f :: * -> *) a. Applicative f => a -> f a
pure (String -> Either String a
forall a b. a -> Either a b
Left String
err))
  case res of
    Left String
err -> do
      String -> Int -> String -> TCMT IO ()
forall (m :: * -> *).
MonadDebug m =>
String -> Int -> String -> m ()
reportSLn String
"import.iface" Int
5 (String -> TCMT IO ()) -> String -> TCMT IO ()
forall a b. (a -> b) -> a -> b
$ String
"Error when decoding interface file: " String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
err
      Maybe a -> TCM (Maybe a)
forall a. a -> TCMT IO a
forall (f :: * -> *) a. Applicative f => a -> f a
pure Maybe a
forall a. Maybe a
Nothing
    Right a
a -> Maybe a -> TCM (Maybe a)
forall a. a -> TCMT IO a
forall (f :: * -> *) a. Applicative f => a -> f a
pure (Maybe a -> TCM (Maybe a)) -> Maybe a -> TCM (Maybe a)
forall a b. (a -> b) -> a -> b
$ a -> Maybe a
forall a. a -> Maybe a
Just a
a

-- | Decode an interface from a bytestring. We check the stored interface
--   version against the current interface version, but we don't do anything
--   with the hash prefix.
decodeInterface :: ByteString -> MaybeT TCM Interface
decodeInterface :: ByteString -> MaybeT (TCMT IO) Interface
decodeInterface ByteString
bstr = Encoded -> MaybeT (TCMT IO) Interface
forall a. EmbPrj a => Encoded -> MaybeT (TCMT IO) a
decode (Encoded -> MaybeT (TCMT IO) Interface)
-> MaybeT (TCMT IO) Encoded -> MaybeT (TCMT IO) Interface
forall (m :: * -> *) a b. Monad m => (a -> m b) -> m a -> m b
=<< ByteString -> MaybeT (TCMT IO) Encoded
deserializeInterface ByteString
bstr

deserializeInterface :: ByteString -> MaybeT TCM Encoded
deserializeInterface :: ByteString -> MaybeT (TCMT IO) Encoded
deserializeInterface ByteString
bstr = do
  let decompressionError :: MaybeT (TCMT IO) a
decompressionError =
        IO a -> MaybeT (TCMT IO) a
forall a. IO a -> MaybeT (TCMT IO) a
tryDecode (IO a -> MaybeT (TCMT IO) a) -> IO a -> MaybeT (TCMT IO) a
forall a b. (a -> b) -> a -> b
$ ErrorCall -> IO a
forall e a. (HasCallStack, Exception e) => e -> IO a
E.throwIO (ErrorCall -> IO a) -> ErrorCall -> IO a
forall a b. (a -> b) -> a -> b
$ String -> ErrorCall
E.ErrorCall String
"decompression error"
  let (ByteString
prefix, ByteString
i) = Int -> ByteString -> (ByteString, ByteString)
B.splitAt Int
ifacePrefixSize ByteString
bstr
  ((_, _, ver) :: InterfacePrefix) <- IO InterfacePrefix -> MaybeT (TCMT IO) InterfacePrefix
forall a. IO a -> MaybeT (TCMT IO) a
tryDecode (IO InterfacePrefix -> MaybeT (TCMT IO) InterfacePrefix)
-> IO InterfacePrefix -> MaybeT (TCMT IO) InterfacePrefix
forall a b. (a -> b) -> a -> b
$ ByteString -> IO InterfacePrefix
forall a. Serialize a => ByteString -> IO a
deserialize ByteString
prefix
  if ver /= currentInterfaceVersion then
    decompressionError
  else case Z.decompress i of
    Decompress
Z.Skip         -> MaybeT (TCMT IO) Encoded
forall {a}. MaybeT (TCMT IO) a
decompressionError
    Z.Error String
e      -> MaybeT (TCMT IO) Encoded
forall {a}. MaybeT (TCMT IO) a
decompressionError
    Z.Decompress ByteString
i -> IO Encoded -> MaybeT (TCMT IO) Encoded
forall a. IO a -> MaybeT (TCMT IO) a
tryDecode (IO Encoded -> MaybeT (TCMT IO) Encoded)
-> IO Encoded -> MaybeT (TCMT IO) Encoded
forall a b. (a -> b) -> a -> b
$ ByteString -> IO Encoded
forall a. Serialize a => ByteString -> IO a
deserialize ByteString
i

--------------------------------------------------------------------------------

-- | Encodes an interface. To ensure relocatability file paths in positions are
-- replaced with module names.
-- A hash-consed and compacted interface is returned.
encodeFile :: FilePath -> Interface -> TCM Interface
encodeFile :: String -> Interface -> TCM Interface
encodeFile String
f Interface
i = do
  let prefix :: InterfacePrefix
prefix = Interface -> InterfacePrefix
getInterfacePrefix Interface
i
  iEncoded <- Interface -> TCM Encoded
forall a. EmbPrj a => a -> TCM Encoded
encode Interface
i
  bstr <- serializeEncodedInterface prefix iEncoded
  i <- Bench.billTo [Bench.Deserialization] $
    maybe __IMPOSSIBLE__ pure =<< runMaybeT (decode @Interface iEncoded)

  -- reload interface from the bytestring instead
  -- i <- Bench.billTo [Bench.Deserialization] $
  --     maybe __IMPOSSIBLE__ pure =<<
  --       runMaybeT (decode =<< deserializeInterface (LB.toStrict bstr))

  liftIO $ createDirectoryIfMissing True (takeDirectory f)
  liftIO $ LB.writeFile f bstr
  pure i

decodeFile :: FilePath -> TCM (Maybe Interface)
decodeFile :: String -> TCMT IO (Maybe Interface)
decodeFile String
f = MaybeT (TCMT IO) Interface -> TCMT IO (Maybe Interface)
forall (m :: * -> *) a. MaybeT m a -> m (Maybe a)
runMaybeT (ByteString -> MaybeT (TCMT IO) Interface
decodeInterface (ByteString -> MaybeT (TCMT IO) Interface)
-> MaybeT (TCMT IO) ByteString -> MaybeT (TCMT IO) Interface
forall (m :: * -> *) a b. Monad m => (a -> m b) -> m a -> m b
=<< IO ByteString -> MaybeT (TCMT IO) ByteString
forall a. IO a -> MaybeT (TCMT IO) a
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (String -> IO ByteString
B.readFile String
f))

deserializeHashes :: ByteString -> IO (Maybe (Hash, Hash))
deserializeHashes :: ByteString -> IO (Maybe (Hash, Hash))
deserializeHashes ByteString
bstr =
  ((Hash, Hash) -> Maybe (Hash, Hash)
forall a. a -> Maybe a
Just ((Hash, Hash) -> Maybe (Hash, Hash))
-> IO (Hash, Hash) -> IO (Maybe (Hash, Hash))
forall (m :: * -> *) a b. Monad m => (a -> b) -> m a -> m b
<$!> ByteString -> IO (Hash, Hash)
forall a. Serialize a => ByteString -> IO a
deserialize ByteString
bstr)
  IO (Maybe (Hash, Hash))
-> (ErrorCall -> IO (Maybe (Hash, Hash)))
-> IO (Maybe (Hash, Hash))
forall e a. Exception e => IO a -> (e -> IO a) -> IO a
`E.catch` \(E.ErrorCall String
_) -> Maybe (Hash, Hash) -> IO (Maybe (Hash, Hash))
forall a. a -> IO a
forall (f :: * -> *) a. Applicative f => a -> f a
pure Maybe (Hash, Hash)
forall a. Maybe a
Nothing