Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Build Set and Map more efficiently and consistently #1042

Merged
merged 5 commits into from
Jan 5, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 22 additions & 1 deletion containers-tests/benchmarks/Map.hs
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,24 @@ main = do
, bench "intersection" $ whnf (M.intersection m) m_even
, bench "split" $ whnf (M.split (bound `div` 2)) m
, bench "fromList" $ whnf M.fromList elems
, bench "fromList-desc" $ whnf M.fromList elems_desc
, bench "fromList-distinctAsc" $ whnf M.fromList elems_distinct_asc
, bench "fromList-distinctAsc:fusion" $
whnf (\n -> M.fromList [(i,i) | i <- [1..n]]) bound
, bench "fromList-distinctDesc" $ whnf M.fromList elems_distinct_desc
, bench "fromList-distinctDesc:fusion" $
whnf (\n -> M.fromList [(i,i) | i <- [n,n-1..1]]) bound
, bench "fromListWith-asc" $ whnf (M.fromListWith (+)) elems_asc
, bench "fromListWith-asc:fusion" $
whnf (\n -> M.fromListWith (+) [(i `div` 2, i) | i <- [1..n]]) bound
, bench "fromListWith-desc" $ whnf (M.fromListWith (+)) elems_desc
, bench "fromListWith-desc:fusion" $
whnf (\n -> M.fromListWith (+) [(i `div` 2, i) | i <- [n,n-1..1]]) bound
, bench "fromListWithKey-asc" $ whnf (M.fromListWithKey sumkv) elems_asc
, bench "fromListWithKey-asc:fusion" $
whnf (\n -> M.fromListWithKey sumkv [(i `div` 2, i) | i <- [1..n]]) bound
, bench "fromListWithKey-desc" $ whnf (M.fromListWithKey sumkv) elems_desc
, bench "fromListWithKey-desc:fusion" $
whnf (\n -> M.fromListWithKey sumkv [(i `div` 2, i) | i <- [n,n-1..1]]) bound
, bench "fromAscList" $ whnf M.fromAscList elems_asc
, bench "fromAscList:fusion" $
whnf (\n -> M.fromAscList [(i `div` 2, i) | i <- [1..n]]) bound
Expand All @@ -113,6 +130,10 @@ main = do
, bgroup "folds" $ foldBenchmarks M.foldr M.foldl M.foldr' M.foldl' foldMap m
, bgroup "folds with key" $
foldWithKeyBenchmarks M.foldrWithKey M.foldlWithKey M.foldrWithKey' M.foldlWithKey' M.foldMapWithKey m
, bench "mapKeys:asc" $ whnf (M.mapKeys (+1)) m
, bench "mapKeys:desc" $ whnf (M.mapKeys (negate . (+1))) m
, bench "mapKeysWith:asc" $ whnf (M.mapKeysWith (+) (`div` 2)) m
, bench "mapKeysWith:desc" $ whnf (M.mapKeysWith (+) (negate . (`div` 2))) m
]
where
bound = 2^12
Expand Down
8 changes: 6 additions & 2 deletions containers-tests/benchmarks/Set.hs
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,8 @@ main = do
defaultMain
[ bench "member" $ whnf (member elems) s
, bench "insert" $ whnf (ins elems) S.empty
, bench "map" $ whnf (S.map (+ 1)) s
, bench "map:asc" $ whnf (S.map (+ 1)) s
, bench "map:desc" $ whnf (S.map (negate . (+ 1))) s
, bench "filter" $ whnf (S.filter ((== 0) . (`mod` 2))) s
, bench "partition" $ whnf (S.partition ((== 0) . (`mod` 2))) s
, bench "delete" $ whnf (del elems) s
Expand All @@ -35,7 +36,10 @@ main = do
, bench "difference" $ whnf (S.difference s) s_even
, bench "intersection" $ whnf (S.intersection s) s_even
, bench "fromList" $ whnf S.fromList elems
, bench "fromList-desc" $ whnf S.fromList elems_desc
, bench "fromList-distinctAsc" $ whnf S.fromList elems_distinct_asc
, bench "fromList-distinctAsc:fusion" $ whnf (\n -> S.fromList [1..n]) bound
, bench "fromList-distinctDesc" $ whnf S.fromList elems_distinct_desc
, bench "fromList-distinctDesc:fusion" $ whnf (\n -> S.fromList [n,n-1..1]) bound
, bench "fromAscList" $ whnf S.fromAscList elems_asc
, bench "fromAscList:fusion" $
whnf (\n -> S.fromAscList [i `div` 2 | i <- [1..n]]) bound
Expand Down
29 changes: 23 additions & 6 deletions containers-tests/tests/map-properties.hs
Original file line number Diff line number Diff line change
Expand Up @@ -201,6 +201,8 @@ main = defaultMain $ testGroup "map-properties"
, testProperty "toDescList" prop_descList
, testProperty "toAscList+toDescList" prop_ascDescList
, testProperty "fromList" prop_fromList
, testProperty "fromListWith" prop_fromListWith
, testProperty "fromListWithKey" prop_fromListWithKey
, testProperty "alter" prop_alter
, testProperty "alterF/alter" prop_alterF_alter
, testProperty "alterF/alter/noRULES" prop_alterF_alter_noRULES
Expand Down Expand Up @@ -229,7 +231,8 @@ main = defaultMain $ testGroup "map-properties"
, testProperty "partition" prop_partition
, testProperty "map" prop_map
, testProperty "fmap" prop_fmap
, testProperty "mapkeys" prop_mapkeys
, testProperty "mapKeys" prop_mapKeys
, testProperty "mapKeysWith" prop_mapKeysWith
, testProperty "split" prop_splitModel
, testProperty "fold" prop_fold
, testProperty "foldMap" prop_foldMap
Expand Down Expand Up @@ -1338,6 +1341,16 @@ prop_fromDistinctAscList kxs =
List.sortBy (comparing fst) kxs
t = fromDistinctAscList nubSortedKxs

prop_fromListWith :: Fun (A, A) A -> [(Int, A)] -> Property
prop_fromListWith f kxs =
fromListWith (applyFun2 f) kxs ===
List.foldl' (\m (kx, x) -> insertWith (applyFun2 f) kx x m) empty kxs

prop_fromListWithKey :: Fun (Int, A, A) A -> [(Int, A)] -> Property
prop_fromListWithKey f kxs =
fromListWithKey (applyFun3 f) kxs ===
List.foldl' (\m (kx, x) -> insertWithKey (applyFun3 f) kx x m) empty kxs

----------------------------------------------------------------

prop_alter :: UMap -> Int -> Bool
Expand Down Expand Up @@ -1543,11 +1556,15 @@ prop_fmap f ys = length ys > 0 ==>
m = fromList xs
in fmap (apply f) m == fromList [ (a, (apply f) b) | (a,b) <- xs ]

prop_mapkeys :: Fun Int Int -> [(Int, Int)] -> Property
prop_mapkeys f ys = length ys > 0 ==>
let xs = List.nubBy ((==) `on` fst) ys
m = fromList xs
in mapKeys (apply f) m == (fromList $ List.nubBy ((==) `on` fst) $ reverse [ (apply f a, b) | (a,b) <- sort xs])
prop_mapKeys :: Fun Int Int -> Map Int A -> Property
prop_mapKeys f m =
mapKeys (applyFun f) m ===
fromList (fmap (\(kx,x) -> (applyFun f kx, x)) (toList m))

prop_mapKeysWith :: Fun (A, A) A -> Fun Int Int -> Map Int A -> Property
prop_mapKeysWith f g m =
mapKeysWith (applyFun2 f) (applyFun g) m ===
fromListWith (applyFun2 f) (fmap (\(kx,x) -> (applyFun g kx, x)) (toList m))

prop_splitModel :: Int -> [(Int, Int)] -> Property
prop_splitModel n ys = length ys > 0 ==>
Expand Down
3 changes: 3 additions & 0 deletions containers/changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,9 @@
* Improved performance for `Data.Set` and `Data.Map`'s `fromAscList*` and
`fromDescList*` functions.

* Improved performance for `Data.Set`'s `fromList`, `map` and `Data.Map`'s
`fromList`, `fromListWith`, `fromListWithKey`, `mapKeys`, `mapKeysWith`.

## Unreleased with `@since` annotation for 0.7.1:

### Additions
Expand Down
130 changes: 75 additions & 55 deletions containers/src/Data/Map/Internal.hs
Original file line number Diff line number Diff line change
Expand Up @@ -367,6 +367,10 @@ module Data.Map.Internal (
, Identity(..)
, Stack(..)
, foldl'Stack
, MapBuilder(..)
, emptyB
, insertB
, finishB

-- Used by Map.Merge.Lazy
, mapWhenMissing
Expand All @@ -388,7 +392,6 @@ import Data.Semigroup (Semigroup((<>)))
#endif
import Control.Applicative (Const (..))
import Control.DeepSeq (NFData(rnf),NFData1(liftRnf),NFData2(liftRnf2))
import Data.Bits (shiftL, shiftR)
import qualified Data.Foldable as Foldable
import Data.Bifoldable
import Utils.Containers.Internal.Prelude hiding
Expand Down Expand Up @@ -3247,6 +3250,8 @@ mapAccumRWithKey f a (Bin sx kx x l r) =
-- | \(O(n \log n)\).
-- @'mapKeys' f s@ is the map obtained by applying @f@ to each key of @s@.
--
-- If `f` is monotonically non-decreasing, this function takes \(O(n)\) time.
--
-- The size of the result may be smaller if @f@ maps two or more distinct
-- keys to the same new key. In this case the value at the greatest of the
-- original keys is retained.
Expand All @@ -3256,14 +3261,16 @@ mapAccumRWithKey f a (Bin sx kx x l r) =
-- > mapKeys (\ _ -> 3) (fromList [(1,"b"), (2,"a"), (3,"d"), (4,"c")]) == singleton 3 "c"

mapKeys :: Ord k2 => (k1->k2) -> Map k1 a -> Map k2 a
mapKeys f = fromList . foldrWithKey (\k x xs -> (f k, x) : xs) []
mapKeys f m = finishB (foldlWithKey' (\b kx x -> insertB (f kx) x b) emptyB m)
#if __GLASGOW_HASKELL__
{-# INLINABLE mapKeys #-}
#endif

-- | \(O(n \log n)\).
-- @'mapKeysWith' c f s@ is the map obtained by applying @f@ to each key of @s@.
--
-- If `f` is monotonically non-decreasing, this function takes \(O(n)\) time.
--
-- The size of the result may be smaller if @f@ maps two or more distinct
-- keys to the same new key. In this case the associated values will be
-- combined using @c@. The value at the greater of the two original keys
Expand All @@ -3275,7 +3282,8 @@ mapKeys f = fromList . foldrWithKey (\k x xs -> (f k, x) : xs) []
-- Also see the performance note on 'fromListWith'.

mapKeysWith :: Ord k2 => (a -> a -> a) -> (k1->k2) -> Map k1 a -> Map k2 a
mapKeysWith c f = fromListWith c . foldrWithKey (\k x xs -> (f k, x) : xs) []
mapKeysWith c f m =
finishB (foldlWithKey' (\b kx x -> insertWithB c (f kx) x b) emptyB m)
#if __GLASGOW_HASKELL__
{-# INLINABLE mapKeysWith #-}
#endif
Expand Down Expand Up @@ -3520,55 +3528,20 @@ instance (Ord k) => GHCExts.IsList (Map k v) where
-- If the list contains more than one value for the same key, the last value
-- for the key is retained.
--
-- If the keys of the list are ordered, a linear-time implementation is used.
-- If the keys are in non-decreasing order, this function takes \(O(n)\) time.
--
-- > fromList [] == empty
-- > fromList [(5,"a"), (3,"b"), (5, "c")] == fromList [(5,"c"), (3,"b")]
-- > fromList [(5,"c"), (3,"b"), (5, "a")] == fromList [(5,"a"), (3,"b")]

-- For some reason, when 'singleton' is used in fromList or in
-- create, it is not inlined, so we inline it manually.
fromList :: Ord k => [(k,a)] -> Map k a
fromList [] = Tip
fromList [(kx, x)] = Bin 1 kx x Tip Tip
fromList ((kx0, x0) : xs0) | not_ordered kx0 xs0 = fromList' (Bin 1 kx0 x0 Tip Tip) xs0
| otherwise = go (1::Int) (Bin 1 kx0 x0 Tip Tip) xs0
where
not_ordered _ [] = False
not_ordered kx ((ky,_) : _) = kx >= ky
{-# INLINE not_ordered #-}

fromList' t0 xs = Foldable.foldl' ins t0 xs
where ins t (k,x) = insert k x t

go !_ t [] = t
go _ t [(kx, x)] = insertMax kx x t
go s l xs@((kx, x) : xss) | not_ordered kx xss = fromList' l xs
| otherwise = case create s xss of
(r, ys, []) -> go (s `shiftL` 1) (link kx x l r) ys
(r, _, ys) -> fromList' (link kx x l r) ys

-- The create is returning a triple (tree, xs, ys). Both xs and ys
-- represent not yet processed elements and only one of them can be nonempty.
-- If ys is nonempty, the keys in ys are not ordered with respect to tree
-- and must be inserted using fromList'. Otherwise the keys have been
-- ordered so far.
create !_ [] = (Tip, [], [])
create s xs@(xp : xss)
| s == 1 = case xp of (kx, x) | not_ordered kx xss -> (Bin 1 kx x Tip Tip, [], xss)
| otherwise -> (Bin 1 kx x Tip Tip, xss, [])
| otherwise = case create (s `shiftR` 1) xs of
res@(_, [], _) -> res
(l, [(ky, y)], zs) -> (insertMax ky y l, [], zs)
(l, ys@((ky, y):yss), _) | not_ordered ky yss -> (l, [], ys)
| otherwise -> case create (s `shiftR` 1) yss of
(r, zs, ws) -> (link ky y l r, zs, ws)
#if __GLASGOW_HASKELL__
{-# INLINABLE fromList #-}
#endif
fromList xs = finishB (Foldable.foldl' (\b (kx, x) -> insertB kx x b) emptyB xs)
{-# INLINE fromList #-} -- INLINE for fusion

-- | \(O(n \log n)\). Build a map from a list of key\/value pairs with a combining function. See also 'fromAscListWith'.
--
-- If the keys are in non-decreasing order, this function takes \(O(n)\) time.
--
-- > fromListWith (++) [(5,"a"), (5,"b"), (3,"x"), (5,"c")] == fromList [(3, "x"), (5, "cba")]
-- > fromListWith (++) [] == empty
--
Expand Down Expand Up @@ -3604,28 +3577,24 @@ fromList ((kx0, x0) : xs0) | not_ordered kx0 xs0 = fromList' (Bin 1 kx0 x0 Tip T
-- > fromListWith (++) $ reverse $ map (\(k, v) -> (k, [v])) someListOfTuples

fromListWith :: Ord k => (a -> a -> a) -> [(k,a)] -> Map k a
fromListWith f xs
= fromListWithKey (\_ x y -> f x y) xs
#if __GLASGOW_HASKELL__
{-# INLINABLE fromListWith #-}
#endif
fromListWith f xs =
finishB (Foldable.foldl' (\b (kx, x) -> insertWithB f kx x b) emptyB xs)
{-# INLINE fromListWith #-} -- INLINE for fusion

-- | \(O(n \log n)\). Build a map from a list of key\/value pairs with a combining function. See also 'fromAscListWithKey'.
--
-- If the keys are in non-decreasing order, this function takes \(O(n)\) time.
--
-- > let f key new_value old_value = show key ++ ":" ++ new_value ++ "|" ++ old_value
-- > fromListWithKey f [(5,"a"), (5,"b"), (3,"b"), (3,"a"), (5,"c")] == fromList [(3, "3:a|b"), (5, "5:c|5:b|a")]
-- > fromListWithKey f [] == empty
--
-- Also see the performance note on 'fromListWith'.

fromListWithKey :: Ord k => (k -> a -> a -> a) -> [(k,a)] -> Map k a
fromListWithKey f xs
= Foldable.foldl' ins empty xs
where
ins t (k,x) = insertWithKey f k x t
#if __GLASGOW_HASKELL__
{-# INLINABLE fromListWithKey #-}
#endif
fromListWithKey f xs =
finishB (Foldable.foldl' (\b (kx, x) -> insertWithB (f kx) kx x b) emptyB xs)
{-# INLINE fromListWithKey #-} -- INLINE for fusion

-- | \(O(n)\). Convert the map to a list of key\/value pairs. Subject to list fusion.
--
Expand Down Expand Up @@ -3971,6 +3940,57 @@ splitMember k0 m = case go k0 m of

data StrictTriple a b c = StrictTriple !a !b !c

{--------------------------------------------------------------------
MapBuilder
--------------------------------------------------------------------}

-- See Note [SetBuilder] in Data.Set.Internal

data MapBuilder k a
= BAsc !(Stack k a)
| BMap !(Map k a)

-- Empty builder.
emptyB :: MapBuilder k a
emptyB = BAsc Nada

-- Insert a key and value. Replaces the old value if one already exists for
-- the key.
insertB :: Ord k => k -> a -> MapBuilder k a -> MapBuilder k a
insertB !ky y b = case b of
BAsc stk -> case stk of
Push kx x l stk' -> case compare ky kx of
LT -> BMap (insert ky y (ascLinkAll stk))
EQ -> BAsc (Push ky y l stk')
GT -> case l of
Tip -> BAsc (ascLinkTop stk' 1 (singleton kx x) ky y)
Bin{} -> BAsc (Push ky y Tip stk)
Nada -> BAsc (Push ky y Tip Nada)
BMap m -> BMap (insert ky y m)
{-# INLINE insertB #-}

-- Insert a key and value. The new value is combined with the old value if one
-- already exists for the key.
insertWithB
:: Ord k => (a -> a -> a) -> k -> a -> MapBuilder k a -> MapBuilder k a
insertWithB f !ky y b = case b of
BAsc stk -> case stk of
Push kx x l stk' -> case compare ky kx of
LT -> BMap (insertWith f ky y (ascLinkAll stk))
EQ -> BAsc (Push ky (f y x) l stk')
GT -> case l of
Tip -> BAsc (ascLinkTop stk' 1 (singleton kx x) ky y)
Bin{} -> BAsc (Push ky y Tip stk)
Nada -> BAsc (Push ky y Tip Nada)
BMap m -> BMap (insertWith f ky y m)
{-# INLINE insertWithB #-}

-- Finalize the builder into a Map.
finishB :: MapBuilder k a -> Map k a
finishB (BAsc stk) = ascLinkAll stk
finishB (BMap m) = m
{-# INLINABLE finishB #-}

{--------------------------------------------------------------------
Utility functions that maintain the balance properties of the tree.
All constructors assume that all values in [l] < [k] and all values
Expand Down
Loading
Loading