{-# LANGUAGE MultiWayIf #-}
{-# LANGUAGE TypeApplications #-}
{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# OPTIONS_HADDOCK hide #-}
-----------------------------------------------------------------------------
-- |
-- Module      :  Codec.Archive.Tar
-- Copyright   :  (c) 2007 Bjorn Bringert,
--                    2008 Andrea Vezzosi,
--                    2008-2009, 2012, 2016 Duncan Coutts
-- License     :  BSD3
--
-- Maintainer  :  duncan@community.haskell.org
-- Portability :  portable
--
-----------------------------------------------------------------------------
module Codec.Archive.Tar.Pack (
    pack,
    packAndCheck,
    packFileEntry,
    packDirectoryEntry,
    packSymlinkEntry,
    longLinkEntry,

    getDirectoryContentsRecursive,
  ) where

import Codec.Archive.Tar.LongNames
import Codec.Archive.Tar.Types
import Control.Monad (join, when, forM, (>=>))
import qualified Data.ByteString as B
import qualified Data.ByteString.Lazy as BL
import Data.Foldable
import System.FilePath
         ( (</>) )
import qualified System.FilePath as FilePath.Native
         ( addTrailingPathSeparator, hasTrailingPathSeparator, splitDirectories )
import System.Directory
         ( listDirectory, doesDirectoryExist, getModificationTime
         , pathIsSymbolicLink, getSymbolicLinkTarget
         , Permissions(..), getPermissions, getFileSize )
import Data.Time.Clock
         ( UTCTime )
import Data.Time.Clock.POSIX
         ( utcTimeToPOSIXSeconds )
import System.IO
         ( IOMode(ReadMode), openBinaryFile, hFileSize )
import System.IO.Unsafe (unsafeInterleaveIO)
import Control.Exception (throwIO, SomeException)
import Codec.Archive.Tar.Check.Internal (checkEntrySecurity)

-- | Creates a tar archive from a list of directory or files. Any directories
-- specified will have their contents included recursively. Paths in the
-- archive will be relative to the given base directory.
--
-- This is a portable implementation of packing suitable for portable archives.
-- In particular it only constructs 'NormalFile', 'Directory' and 'SymbolicLink'
-- entries. Hard links are treated like ordinary files. Special files like
-- FIFOs (named pipes), sockets or device files will cause problems.
--
-- * This function returns results lazily. Subdirectories are scanned
-- and files are read one by one as the list of entries is consumed.
-- Do not change their contents before the output of 'Codec.Archive.Tar.pack' was consumed in full.
--
pack
  :: FilePath   -- ^ Base directory
  -> [FilePath] -- ^ Files and directories to pack, relative to the base dir
  -> IO [Entry]
pack :: FilePath -> [FilePath] -> IO [Entry]
pack = (GenEntry FilePath FilePath -> Maybe SomeException)
-> FilePath -> [FilePath] -> IO [Entry]
packAndCheck (Maybe SomeException
-> GenEntry FilePath FilePath -> Maybe SomeException
forall a b. a -> b -> a
const Maybe SomeException
forall a. Maybe a
Nothing)

-- | Like 'Codec.Archive.Tar.pack', but allows to specify additional sanity/security
-- checks on the input filenames. This is useful if you know which
-- check will be used on client side
-- in 'Codec.Archive.Tar.unpack' / 'Codec.Archive.Tar.unpackAndCheck'.
--
-- @since 0.6.0.0
packAndCheck
  :: (GenEntry FilePath FilePath -> Maybe SomeException)
  -> FilePath   -- ^ Base directory
  -> [FilePath] -- ^ Files and directories to pack, relative to the base dir
  -> IO [Entry]
packAndCheck :: (GenEntry FilePath FilePath -> Maybe SomeException)
-> FilePath -> [FilePath] -> IO [Entry]
packAndCheck GenEntry FilePath FilePath -> Maybe SomeException
secCB FilePath
baseDir [FilePath]
relpaths = do
  paths <- FilePath -> [FilePath] -> IO [FilePath]
preparePaths FilePath
baseDir [FilePath]
relpaths
  entries <- packPaths baseDir paths
  traverse_ (maybe (pure ()) throwIO . secCB) entries
  pure $ concatMap encodeLongNames entries

preparePaths :: FilePath -> [FilePath] -> IO [FilePath]
preparePaths :: FilePath -> [FilePath] -> IO [FilePath]
preparePaths FilePath
baseDir = ([[FilePath]] -> [FilePath]) -> IO [[FilePath]] -> IO [FilePath]
forall a b. (a -> b) -> IO a -> IO b
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap [[FilePath]] -> [FilePath]
forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat (IO [[FilePath]] -> IO [FilePath])
-> ([FilePath] -> IO [[FilePath]]) -> [FilePath] -> IO [FilePath]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [IO [FilePath]] -> IO [[FilePath]]
forall a. [IO a] -> IO [a]
interleave ([IO [FilePath]] -> IO [[FilePath]])
-> ([FilePath] -> [IO [FilePath]]) -> [FilePath] -> IO [[FilePath]]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (FilePath -> IO [FilePath]) -> [FilePath] -> [IO [FilePath]]
forall a b. (a -> b) -> [a] -> [b]
map FilePath -> IO [FilePath]
go
  where
    go :: FilePath -> IO [FilePath]
go FilePath
relpath = do
      let abspath :: FilePath
abspath = FilePath
baseDir FilePath -> FilePath -> FilePath
</> FilePath
relpath
      isDir  <- FilePath -> IO Bool
doesDirectoryExist FilePath
abspath
      isSymlink <- pathIsSymbolicLink abspath
      if isDir && not isSymlink then do
        entries <- getDirectoryContentsRecursive abspath
        let entries' = (FilePath -> FilePath) -> [FilePath] -> [FilePath]
forall a b. (a -> b) -> [a] -> [b]
map (FilePath
relpath FilePath -> FilePath -> FilePath
</>) [FilePath]
entries
        return $ if null relpath
          then entries'
          else FilePath.Native.addTrailingPathSeparator relpath : entries'
      else return [relpath]

-- | Pack paths while accounting for overlong filepaths.
packPaths
  :: FilePath
  -> [FilePath]
  -> IO [GenEntry FilePath FilePath]
packPaths :: FilePath -> [FilePath] -> IO [GenEntry FilePath FilePath]
packPaths FilePath
baseDir [FilePath]
paths = [IO (GenEntry FilePath FilePath)]
-> IO [GenEntry FilePath FilePath]
forall a. [IO a] -> IO [a]
interleave ([IO (GenEntry FilePath FilePath)]
 -> IO [GenEntry FilePath FilePath])
-> [IO (GenEntry FilePath FilePath)]
-> IO [GenEntry FilePath FilePath]
forall a b. (a -> b) -> a -> b
$ ((FilePath -> IO (GenEntry FilePath FilePath))
 -> [FilePath] -> [IO (GenEntry FilePath FilePath)])
-> [FilePath]
-> (FilePath -> IO (GenEntry FilePath FilePath))
-> [IO (GenEntry FilePath FilePath)]
forall a b c. (a -> b -> c) -> b -> a -> c
flip (FilePath -> IO (GenEntry FilePath FilePath))
-> [FilePath] -> [IO (GenEntry FilePath FilePath)]
forall a b. (a -> b) -> [a] -> [b]
map [FilePath]
paths ((FilePath -> IO (GenEntry FilePath FilePath))
 -> [IO (GenEntry FilePath FilePath)])
-> (FilePath -> IO (GenEntry FilePath FilePath))
-> [IO (GenEntry FilePath FilePath)]
forall a b. (a -> b) -> a -> b
$ \FilePath
relpath -> do
  let isDir :: Bool
isDir = FilePath -> Bool
FilePath.Native.hasTrailingPathSeparator FilePath
abspath
      abspath :: FilePath
abspath = FilePath
baseDir FilePath -> FilePath -> FilePath
</> FilePath
relpath
  isSymlink <- FilePath -> IO Bool
pathIsSymbolicLink FilePath
abspath
  let mkEntry
        | Bool
isSymlink = FilePath -> tarPath -> IO (GenEntry tarPath FilePath)
forall tarPath.
FilePath -> tarPath -> IO (GenEntry tarPath FilePath)
packSymlinkEntry
        | Bool
isDir = FilePath -> tarPath -> IO (GenEntry tarPath FilePath)
forall tarPath linkTarget.
FilePath -> tarPath -> IO (GenEntry tarPath linkTarget)
packDirectoryEntry
        | Bool
otherwise = FilePath -> tarPath -> IO (GenEntry tarPath FilePath)
forall tarPath linkTarget.
FilePath -> tarPath -> IO (GenEntry tarPath linkTarget)
packFileEntry
  mkEntry abspath relpath

interleave :: [IO a] -> IO [a]
interleave :: forall a. [IO a] -> IO [a]
interleave = IO [a] -> IO [a]
forall a. IO a -> IO a
unsafeInterleaveIO (IO [a] -> IO [a]) -> ([IO a] -> IO [a]) -> [IO a] -> IO [a]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [IO a] -> IO [a]
forall a. [IO a] -> IO [a]
go
  where
    go :: [IO a] -> IO [a]
go []     = [a] -> IO [a]
forall a. a -> IO a
forall (m :: * -> *) a. Monad m => a -> m a
return []
    go (IO a
x:[IO a]
xs) = do
      x'  <- IO a
x
      xs' <- interleave xs
      return (x':xs')

-- | Construct a tar entry based on a local file.
--
-- This sets the entry size, the data contained in the file and the file's
-- modification time. If the file is executable then that information is also
-- preserved. File ownership and detailed permissions are not preserved.
--
-- * The file contents is read lazily.
--
packFileEntry
  :: FilePath -- ^ Full path to find the file on the local disk
  -> tarPath  -- ^ Path to use for the tar 'GenEntry' in the archive
  -> IO (GenEntry tarPath linkTarget)
packFileEntry :: forall tarPath linkTarget.
FilePath -> tarPath -> IO (GenEntry tarPath linkTarget)
packFileEntry FilePath
filepath tarPath
tarpath = do
  mtime   <- FilePath -> IO EpochTime
getModTime FilePath
filepath
  perms   <- getPermissions filepath
  -- Get file size without opening it.
  approxSize <- getFileSize filepath

  (content, size) <- if approxSize < 131072
    -- If file is short enough, just read it strictly
    -- so that no file handle dangles around indefinitely.
    then do
      cnt <- B.readFile filepath
      pure (BL.fromStrict cnt, fromIntegral $ B.length cnt)
    else do
      hndl <- openBinaryFile filepath ReadMode
      -- File size could have changed between measuring approxSize
      -- and here. Measuring again.
      sz <- hFileSize hndl
      -- Lazy I/O at its best: once cnt is forced in full,
      -- BL.hGetContents will close the handle.
      cnt <- BL.hGetContents hndl
      -- It would be wrong to return (cnt, BL.length sz):
      -- NormalFile constructor below forces size which in turn
      -- allocates entire cnt in memory at once.
      pure (cnt, fromInteger sz)

  pure (simpleEntry tarpath (NormalFile content size))
    { entryPermissions =
      if executable perms then executableFilePermissions else ordinaryFilePermissions
    , entryTime = mtime
    }

-- | Construct a tar entry based on a local directory (but not its contents).
--
-- The only attribute of the directory that is used is its modification time.
-- Directory ownership and detailed permissions are not preserved.
--
packDirectoryEntry
  :: FilePath -- ^ Full path to find the file on the local disk
  -> tarPath  -- ^ Path to use for the tar 'GenEntry' in the archive
  -> IO (GenEntry tarPath linkTarget)
packDirectoryEntry :: forall tarPath linkTarget.
FilePath -> tarPath -> IO (GenEntry tarPath linkTarget)
packDirectoryEntry FilePath
filepath tarPath
tarpath = do
  mtime   <- FilePath -> IO EpochTime
getModTime FilePath
filepath
  return (directoryEntry tarpath) {
    entryTime = mtime
  }

-- | Construct a tar entry based on a local symlink.
--
-- This automatically checks symlink safety via 'checkEntrySecurity'.
--
-- @since 0.6.0.0
packSymlinkEntry
  :: FilePath -- ^ Full path to find the file on the local disk
  -> tarPath  -- ^ Path to use for the tar 'GenEntry' in the archive
  -> IO (GenEntry tarPath FilePath)
packSymlinkEntry :: forall tarPath.
FilePath -> tarPath -> IO (GenEntry tarPath FilePath)
packSymlinkEntry FilePath
filepath tarPath
tarpath = do
  linkTarget <- FilePath -> IO FilePath
getSymbolicLinkTarget FilePath
filepath
  pure $ symlinkEntry tarpath linkTarget

-- | This is a utility function, much like 'listDirectory'. The
-- difference is that it includes the contents of subdirectories.
--
-- The paths returned are all relative to the top directory. Directory paths
-- are distinguishable by having a trailing path separator
-- (see 'FilePath.Native.hasTrailingPathSeparator').
--
-- All directories are listed before the files that they contain. Amongst the
-- contents of a directory, subdirectories are listed after normal files. The
-- overall result is that files within a directory will be together in a single
-- contiguous group. This tends to improve file layout and IO performance when
-- creating or extracting tar archives.
--
-- * This function returns results lazily. Subdirectories are not scanned
-- until the files entries in the parent directory have been consumed.
-- If the source directory structure changes before the result is used in full,
-- the behaviour is undefined.
--
getDirectoryContentsRecursive :: FilePath -> IO [FilePath]
getDirectoryContentsRecursive :: FilePath -> IO [FilePath]
getDirectoryContentsRecursive FilePath
dir0 =
  ([FilePath] -> [FilePath]) -> IO [FilePath] -> IO [FilePath]
forall a b. (a -> b) -> IO a -> IO b
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (Int -> [FilePath] -> [FilePath]
forall a. Int -> [a] -> [a]
drop Int
1) (FilePath -> [FilePath] -> IO [FilePath]
recurseDirectories FilePath
dir0 [FilePath
""])

recurseDirectories :: FilePath -> [FilePath] -> IO [FilePath]
recurseDirectories :: FilePath -> [FilePath] -> IO [FilePath]
recurseDirectories FilePath
_    []         = [FilePath] -> IO [FilePath]
forall a. a -> IO a
forall (m :: * -> *) a. Monad m => a -> m a
return []
recurseDirectories FilePath
base (FilePath
dir:[FilePath]
dirs) = IO [FilePath] -> IO [FilePath]
forall a. IO a -> IO a
unsafeInterleaveIO (IO [FilePath] -> IO [FilePath]) -> IO [FilePath] -> IO [FilePath]
forall a b. (a -> b) -> a -> b
$ do
  (files, dirs') <- [FilePath]
-> [FilePath] -> [FilePath] -> IO ([FilePath], [FilePath])
collect [] [] ([FilePath] -> IO ([FilePath], [FilePath]))
-> IO [FilePath] -> IO ([FilePath], [FilePath])
forall (m :: * -> *) a b. Monad m => (a -> m b) -> m a -> m b
=<< FilePath -> IO [FilePath]
listDirectory (FilePath
base FilePath -> FilePath -> FilePath
</> FilePath
dir)

  files' <- recurseDirectories base (dirs' ++ dirs)
  return (dir : files ++ files')

  where
    collect :: [FilePath]
-> [FilePath] -> [FilePath] -> IO ([FilePath], [FilePath])
collect [FilePath]
files [FilePath]
dirs' []              = ([FilePath], [FilePath]) -> IO ([FilePath], [FilePath])
forall a. a -> IO a
forall (m :: * -> *) a. Monad m => a -> m a
return ([FilePath] -> [FilePath]
forall a. [a] -> [a]
reverse [FilePath]
files, [FilePath] -> [FilePath]
forall a. [a] -> [a]
reverse [FilePath]
dirs')
    collect [FilePath]
files [FilePath]
dirs' (FilePath
entry:[FilePath]
entries) = do
      let dirEntry :: FilePath
dirEntry  = FilePath
dir FilePath -> FilePath -> FilePath
</> FilePath
entry
          dirEntry' :: FilePath
dirEntry' = FilePath -> FilePath
FilePath.Native.addTrailingPathSeparator FilePath
dirEntry
      isDirectory <- FilePath -> IO Bool
doesDirectoryExist (FilePath
base FilePath -> FilePath -> FilePath
</> FilePath
dirEntry)
      isSymlink <- pathIsSymbolicLink (base </> dirEntry)
      if isDirectory && not isSymlink
        then collect files (dirEntry':dirs') entries
        else collect (dirEntry:files) dirs' entries

getModTime :: FilePath -> IO EpochTime
getModTime :: FilePath -> IO EpochTime
getModTime FilePath
path = do
  -- The directory package switched to the new time package
  t <- FilePath -> IO UTCTime
getModificationTime FilePath
path
  return . floor . utcTimeToPOSIXSeconds $ t