From 56387f0306f059f84faaf867a3fe044e908d3417 Mon Sep 17 00:00:00 2001 From: meooow25 Date: Sun, 1 Sep 2024 02:30:21 +0530 Subject: [PATCH 1/5] Benchmarks for affected functions --- containers-tests/benchmarks/Map.hs | 23 ++++++++++++++++++++++- containers-tests/benchmarks/Set.hs | 8 ++++++-- 2 files changed, 28 insertions(+), 3 deletions(-) diff --git a/containers-tests/benchmarks/Map.hs b/containers-tests/benchmarks/Map.hs index 8a67b6c6a..f3eff7ba4 100644 --- a/containers-tests/benchmarks/Map.hs +++ b/containers-tests/benchmarks/Map.hs @@ -88,7 +88,24 @@ main = do , bench "intersection" $ whnf (M.intersection m) m_even , bench "split" $ whnf (M.split (bound `div` 2)) m , bench "fromList" $ whnf M.fromList elems - , bench "fromList-desc" $ whnf M.fromList elems_desc + , bench "fromList-distinctAsc" $ whnf M.fromList elems_distinct_asc + , bench "fromList-distinctAsc:fusion" $ + whnf (\n -> M.fromList [(i,i) | i <- [1..n]]) bound + , bench "fromList-distinctDesc" $ whnf M.fromList elems_distinct_desc + , bench "fromList-distinctDesc:fusion" $ + whnf (\n -> M.fromList [(i,i) | i <- [n,n-1..1]]) bound + , bench "fromListWith-asc" $ whnf (M.fromListWith (+)) elems_asc + , bench "fromListWith-asc:fusion" $ + whnf (\n -> M.fromListWith (+) [(i `div` 2, i) | i <- [1..n]]) bound + , bench "fromListWith-desc" $ whnf (M.fromListWith (+)) elems_desc + , bench "fromListWith-desc:fusion" $ + whnf (\n -> M.fromListWith (+) [(i `div` 2, i) | i <- [n,n-1..1]]) bound + , bench "fromListWithKey-asc" $ whnf (M.fromListWithKey sumkv) elems_asc + , bench "fromListWithKey-asc:fusion" $ + whnf (\n -> M.fromListWithKey sumkv [(i `div` 2, i) | i <- [1..n]]) bound + , bench "fromListWithKey-desc" $ whnf (M.fromListWithKey sumkv) elems_desc + , bench "fromListWithKey-desc:fusion" $ + whnf (\n -> M.fromListWithKey sumkv [(i `div` 2, i) | i <- [n,n-1..1]]) bound , bench "fromAscList" $ whnf M.fromAscList elems_asc , bench "fromAscList:fusion" $ whnf (\n -> M.fromAscList [(i `div` 2, i) | i <- [1..n]]) bound @@ -113,6 +130,10 @@ main = do , bgroup "folds" $ foldBenchmarks M.foldr M.foldl M.foldr' M.foldl' foldMap m , bgroup "folds with key" $ foldWithKeyBenchmarks M.foldrWithKey M.foldlWithKey M.foldrWithKey' M.foldlWithKey' M.foldMapWithKey m + , bench "mapKeys:asc" $ whnf (M.mapKeys (+1)) m + , bench "mapKeys:desc" $ whnf (M.mapKeys (negate . (+1))) m + , bench "mapKeysWith:asc" $ whnf (M.mapKeysWith (+) (`div` 2)) m + , bench "mapKeysWith:desc" $ whnf (M.mapKeysWith (+) (negate . (`div` 2))) m ] where bound = 2^12 diff --git a/containers-tests/benchmarks/Set.hs b/containers-tests/benchmarks/Set.hs index f49da1133..9e367391b 100644 --- a/containers-tests/benchmarks/Set.hs +++ b/containers-tests/benchmarks/Set.hs @@ -22,7 +22,8 @@ main = do defaultMain [ bench "member" $ whnf (member elems) s , bench "insert" $ whnf (ins elems) S.empty - , bench "map" $ whnf (S.map (+ 1)) s + , bench "map:asc" $ whnf (S.map (+ 1)) s + , bench "map:desc" $ whnf (S.map (negate . (+ 1))) s , bench "filter" $ whnf (S.filter ((== 0) . (`mod` 2))) s , bench "partition" $ whnf (S.partition ((== 0) . (`mod` 2))) s , bench "delete" $ whnf (del elems) s @@ -35,7 +36,10 @@ main = do , bench "difference" $ whnf (S.difference s) s_even , bench "intersection" $ whnf (S.intersection s) s_even , bench "fromList" $ whnf S.fromList elems - , bench "fromList-desc" $ whnf S.fromList elems_desc + , bench "fromList-distinctAsc" $ whnf S.fromList elems_distinct_asc + , bench "fromList-distinctAsc:fusion" $ whnf (\n -> S.fromList [1..n]) bound + , bench "fromList-distinctDesc" $ whnf S.fromList elems_distinct_desc + , bench "fromList-distinctDesc:fusion" $ whnf (\n -> S.fromList [n,n-1..1]) bound , bench "fromAscList" $ whnf S.fromAscList elems_asc , bench "fromAscList:fusion" $ whnf (\n -> S.fromAscList [i `div` 2 | i <- [1..n]]) bound From d364cff3c89fa8b691fe9c32dffa047788ec9ef7 Mon Sep 17 00:00:00 2001 From: meooow25 Date: Sun, 1 Sep 2024 21:29:30 +0530 Subject: [PATCH 2/5] Property tests for affected functions --- containers-tests/tests/map-properties.hs | 29 +++++++++++++++++++----- 1 file changed, 23 insertions(+), 6 deletions(-) diff --git a/containers-tests/tests/map-properties.hs b/containers-tests/tests/map-properties.hs index 8614b9d42..82cbf9339 100644 --- a/containers-tests/tests/map-properties.hs +++ b/containers-tests/tests/map-properties.hs @@ -201,6 +201,8 @@ main = defaultMain $ testGroup "map-properties" , testProperty "toDescList" prop_descList , testProperty "toAscList+toDescList" prop_ascDescList , testProperty "fromList" prop_fromList + , testProperty "fromListWith" prop_fromListWith + , testProperty "fromListWithKey" prop_fromListWithKey , testProperty "alter" prop_alter , testProperty "alterF/alter" prop_alterF_alter , testProperty "alterF/alter/noRULES" prop_alterF_alter_noRULES @@ -229,7 +231,8 @@ main = defaultMain $ testGroup "map-properties" , testProperty "partition" prop_partition , testProperty "map" prop_map , testProperty "fmap" prop_fmap - , testProperty "mapkeys" prop_mapkeys + , testProperty "mapKeys" prop_mapKeys + , testProperty "mapKeysWith" prop_mapKeysWith , testProperty "split" prop_splitModel , testProperty "fold" prop_fold , testProperty "foldMap" prop_foldMap @@ -1338,6 +1341,16 @@ prop_fromDistinctAscList kxs = List.sortBy (comparing fst) kxs t = fromDistinctAscList nubSortedKxs +prop_fromListWith :: Fun (A, A) A -> [(Int, A)] -> Property +prop_fromListWith f kxs = + fromListWith (applyFun2 f) kxs === + List.foldl' (\m (kx, x) -> insertWith (applyFun2 f) kx x m) empty kxs + +prop_fromListWithKey :: Fun (Int, A, A) A -> [(Int, A)] -> Property +prop_fromListWithKey f kxs = + fromListWithKey (applyFun3 f) kxs === + List.foldl' (\m (kx, x) -> insertWithKey (applyFun3 f) kx x m) empty kxs + ---------------------------------------------------------------- prop_alter :: UMap -> Int -> Bool @@ -1543,11 +1556,15 @@ prop_fmap f ys = length ys > 0 ==> m = fromList xs in fmap (apply f) m == fromList [ (a, (apply f) b) | (a,b) <- xs ] -prop_mapkeys :: Fun Int Int -> [(Int, Int)] -> Property -prop_mapkeys f ys = length ys > 0 ==> - let xs = List.nubBy ((==) `on` fst) ys - m = fromList xs - in mapKeys (apply f) m == (fromList $ List.nubBy ((==) `on` fst) $ reverse [ (apply f a, b) | (a,b) <- sort xs]) +prop_mapKeys :: Fun Int Int -> Map Int A -> Property +prop_mapKeys f m = + mapKeys (applyFun f) m === + fromList (fmap (\(kx,x) -> (applyFun f kx, x)) (toList m)) + +prop_mapKeysWith :: Fun (A, A) A -> Fun Int Int -> Map Int A -> Property +prop_mapKeysWith f g m = + mapKeysWith (applyFun2 f) (applyFun g) m === + fromListWith (applyFun2 f) (fmap (\(kx,x) -> (applyFun g kx, x)) (toList m)) prop_splitModel :: Int -> [(Int, Int)] -> Property prop_splitModel n ys = length ys > 0 ==> From 1e552395e763da075e89112363e7735c13d5443b Mon Sep 17 00:00:00 2001 From: meooow25 Date: Sun, 1 Sep 2024 14:04:51 +0530 Subject: [PATCH 3/5] Build Set and Map more consistently and efficiently Use "Builder"s to implement some Set and Map construction functions. As a result, some have become good consumers in terms of list fusion, and all are now O(n) for non-decreasing input. Fusible Fusible O(n) for O(n) for before after before after Set.fromList No Yes Strict incr Non-decr Set.map - - Strict incr Non-decr Map.fromList No Yes Strict incr Non-decr Map.fromListWith Yes Yes Never Non-decr Map.fromListWithKey Yes Yes Never Non-decr Map.mapKeys - - Strict incr Non-decr Map.mapKeysWith - - Never Non-decr --- containers/src/Data/Map/Internal.hs | 130 ++++++++++++--------- containers/src/Data/Map/Strict/Internal.hs | 91 ++++++--------- containers/src/Data/Set/Internal.hs | 99 +++++++++------- 3 files changed, 168 insertions(+), 152 deletions(-) diff --git a/containers/src/Data/Map/Internal.hs b/containers/src/Data/Map/Internal.hs index b72948317..61922700f 100644 --- a/containers/src/Data/Map/Internal.hs +++ b/containers/src/Data/Map/Internal.hs @@ -367,6 +367,10 @@ module Data.Map.Internal ( , Identity(..) , Stack(..) , foldl'Stack + , MapBuilder(..) + , emptyB + , insertB + , finishB -- Used by Map.Merge.Lazy , mapWhenMissing @@ -388,7 +392,6 @@ import Data.Semigroup (Semigroup((<>))) #endif import Control.Applicative (Const (..)) import Control.DeepSeq (NFData(rnf),NFData1(liftRnf),NFData2(liftRnf2)) -import Data.Bits (shiftL, shiftR) import qualified Data.Foldable as Foldable import Data.Bifoldable import Utils.Containers.Internal.Prelude hiding @@ -3247,6 +3250,8 @@ mapAccumRWithKey f a (Bin sx kx x l r) = -- | \(O(n \log n)\). -- @'mapKeys' f s@ is the map obtained by applying @f@ to each key of @s@. -- +-- If `f` is monotonically non-decreasing, this function takes \(O(n)\) time. +-- -- The size of the result may be smaller if @f@ maps two or more distinct -- keys to the same new key. In this case the value at the greatest of the -- original keys is retained. @@ -3256,7 +3261,7 @@ mapAccumRWithKey f a (Bin sx kx x l r) = -- > mapKeys (\ _ -> 3) (fromList [(1,"b"), (2,"a"), (3,"d"), (4,"c")]) == singleton 3 "c" mapKeys :: Ord k2 => (k1->k2) -> Map k1 a -> Map k2 a -mapKeys f = fromList . foldrWithKey (\k x xs -> (f k, x) : xs) [] +mapKeys f m = finishB (foldlWithKey' (\b kx x -> insertB (f kx) x b) emptyB m) #if __GLASGOW_HASKELL__ {-# INLINABLE mapKeys #-} #endif @@ -3264,6 +3269,8 @@ mapKeys f = fromList . foldrWithKey (\k x xs -> (f k, x) : xs) [] -- | \(O(n \log n)\). -- @'mapKeysWith' c f s@ is the map obtained by applying @f@ to each key of @s@. -- +-- If `f` is monotonically non-decreasing, this function takes \(O(n)\) time. +-- -- The size of the result may be smaller if @f@ maps two or more distinct -- keys to the same new key. In this case the associated values will be -- combined using @c@. The value at the greater of the two original keys @@ -3275,7 +3282,8 @@ mapKeys f = fromList . foldrWithKey (\k x xs -> (f k, x) : xs) [] -- Also see the performance note on 'fromListWith'. mapKeysWith :: Ord k2 => (a -> a -> a) -> (k1->k2) -> Map k1 a -> Map k2 a -mapKeysWith c f = fromListWith c . foldrWithKey (\k x xs -> (f k, x) : xs) [] +mapKeysWith c f m = + finishB (foldlWithKey' (\b kx x -> insertWithB c (f kx) x b) emptyB m) #if __GLASGOW_HASKELL__ {-# INLINABLE mapKeysWith #-} #endif @@ -3520,55 +3528,20 @@ instance (Ord k) => GHCExts.IsList (Map k v) where -- If the list contains more than one value for the same key, the last value -- for the key is retained. -- --- If the keys of the list are ordered, a linear-time implementation is used. +-- If the keys are in non-decreasing order, this function takes \(O(n)\) time. -- -- > fromList [] == empty -- > fromList [(5,"a"), (3,"b"), (5, "c")] == fromList [(5,"c"), (3,"b")] -- > fromList [(5,"c"), (3,"b"), (5, "a")] == fromList [(5,"a"), (3,"b")] --- For some reason, when 'singleton' is used in fromList or in --- create, it is not inlined, so we inline it manually. fromList :: Ord k => [(k,a)] -> Map k a -fromList [] = Tip -fromList [(kx, x)] = Bin 1 kx x Tip Tip -fromList ((kx0, x0) : xs0) | not_ordered kx0 xs0 = fromList' (Bin 1 kx0 x0 Tip Tip) xs0 - | otherwise = go (1::Int) (Bin 1 kx0 x0 Tip Tip) xs0 - where - not_ordered _ [] = False - not_ordered kx ((ky,_) : _) = kx >= ky - {-# INLINE not_ordered #-} - - fromList' t0 xs = Foldable.foldl' ins t0 xs - where ins t (k,x) = insert k x t - - go !_ t [] = t - go _ t [(kx, x)] = insertMax kx x t - go s l xs@((kx, x) : xss) | not_ordered kx xss = fromList' l xs - | otherwise = case create s xss of - (r, ys, []) -> go (s `shiftL` 1) (link kx x l r) ys - (r, _, ys) -> fromList' (link kx x l r) ys - - -- The create is returning a triple (tree, xs, ys). Both xs and ys - -- represent not yet processed elements and only one of them can be nonempty. - -- If ys is nonempty, the keys in ys are not ordered with respect to tree - -- and must be inserted using fromList'. Otherwise the keys have been - -- ordered so far. - create !_ [] = (Tip, [], []) - create s xs@(xp : xss) - | s == 1 = case xp of (kx, x) | not_ordered kx xss -> (Bin 1 kx x Tip Tip, [], xss) - | otherwise -> (Bin 1 kx x Tip Tip, xss, []) - | otherwise = case create (s `shiftR` 1) xs of - res@(_, [], _) -> res - (l, [(ky, y)], zs) -> (insertMax ky y l, [], zs) - (l, ys@((ky, y):yss), _) | not_ordered ky yss -> (l, [], ys) - | otherwise -> case create (s `shiftR` 1) yss of - (r, zs, ws) -> (link ky y l r, zs, ws) -#if __GLASGOW_HASKELL__ -{-# INLINABLE fromList #-} -#endif +fromList xs = finishB (Foldable.foldl' (\b (kx, x) -> insertB kx x b) emptyB xs) +{-# INLINE fromList #-} -- INLINE for fusion -- | \(O(n \log n)\). Build a map from a list of key\/value pairs with a combining function. See also 'fromAscListWith'. -- +-- If the keys are in non-decreasing order, this function takes \(O(n)\) time. +-- -- > fromListWith (++) [(5,"a"), (5,"b"), (3,"x"), (5,"c")] == fromList [(3, "x"), (5, "cba")] -- > fromListWith (++) [] == empty -- @@ -3604,14 +3577,14 @@ fromList ((kx0, x0) : xs0) | not_ordered kx0 xs0 = fromList' (Bin 1 kx0 x0 Tip T -- > fromListWith (++) $ reverse $ map (\(k, v) -> (k, [v])) someListOfTuples fromListWith :: Ord k => (a -> a -> a) -> [(k,a)] -> Map k a -fromListWith f xs - = fromListWithKey (\_ x y -> f x y) xs -#if __GLASGOW_HASKELL__ -{-# INLINABLE fromListWith #-} -#endif +fromListWith f xs = + finishB (Foldable.foldl' (\b (kx, x) -> insertWithB f kx x b) emptyB xs) +{-# INLINE fromListWith #-} -- INLINE for fusion -- | \(O(n \log n)\). Build a map from a list of key\/value pairs with a combining function. See also 'fromAscListWithKey'. -- +-- If the keys are in non-decreasing order, this function takes \(O(n)\) time. +-- -- > let f key new_value old_value = show key ++ ":" ++ new_value ++ "|" ++ old_value -- > fromListWithKey f [(5,"a"), (5,"b"), (3,"b"), (3,"a"), (5,"c")] == fromList [(3, "3:a|b"), (5, "5:c|5:b|a")] -- > fromListWithKey f [] == empty @@ -3619,13 +3592,9 @@ fromListWith f xs -- Also see the performance note on 'fromListWith'. fromListWithKey :: Ord k => (k -> a -> a -> a) -> [(k,a)] -> Map k a -fromListWithKey f xs - = Foldable.foldl' ins empty xs - where - ins t (k,x) = insertWithKey f k x t -#if __GLASGOW_HASKELL__ -{-# INLINABLE fromListWithKey #-} -#endif +fromListWithKey f xs = + finishB (Foldable.foldl' (\b (kx, x) -> insertWithB (f kx) kx x b) emptyB xs) +{-# INLINE fromListWithKey #-} -- INLINE for fusion -- | \(O(n)\). Convert the map to a list of key\/value pairs. Subject to list fusion. -- @@ -3971,6 +3940,57 @@ splitMember k0 m = case go k0 m of data StrictTriple a b c = StrictTriple !a !b !c +{-------------------------------------------------------------------- + MapBuilder +--------------------------------------------------------------------} + +-- See Note [SetBuilder] in Data.Set.Internal + +data MapBuilder k a + = BAsc !(Stack k a) + | BMap !(Map k a) + +-- Empty builder. +emptyB :: MapBuilder k a +emptyB = BAsc Nada + +-- Insert a key and value. Replaces the old value if one already exists for +-- the key. +insertB :: Ord k => k -> a -> MapBuilder k a -> MapBuilder k a +insertB !ky y b = case b of + BAsc stk -> case stk of + Push kx x l stk' -> case compare ky kx of + LT -> BMap (insert ky y (ascLinkAll stk)) + EQ -> BAsc (Push ky y l stk') + GT -> case l of + Tip -> BAsc (ascLinkTop stk' 1 (singleton kx x) ky y) + Bin{} -> BAsc (Push ky y Tip stk) + Nada -> BAsc (Push ky y Tip Nada) + BMap m -> BMap (insert ky y m) +{-# INLINE insertB #-} + +-- Insert a key and value. The new value is combined with the old value if one +-- already exists for the key. +insertWithB + :: Ord k => (a -> a -> a) -> k -> a -> MapBuilder k a -> MapBuilder k a +insertWithB f !ky y b = case b of + BAsc stk -> case stk of + Push kx x l stk' -> case compare ky kx of + LT -> BMap (insertWith f ky y (ascLinkAll stk)) + EQ -> BAsc (Push ky (f y x) l stk') + GT -> case l of + Tip -> BAsc (ascLinkTop stk' 1 (singleton kx x) ky y) + Bin{} -> BAsc (Push ky y Tip stk) + Nada -> BAsc (Push ky y Tip Nada) + BMap m -> BMap (insertWith f ky y m) +{-# INLINE insertWithB #-} + +-- Finalize the builder into a Map. +finishB :: MapBuilder k a -> Map k a +finishB (BAsc stk) = ascLinkAll stk +finishB (BMap m) = m +{-# INLINABLE finishB #-} + {-------------------------------------------------------------------- Utility functions that maintain the balance properties of the tree. All constructors assume that all values in [l] < [k] and all values diff --git a/containers/src/Data/Map/Strict/Internal.hs b/containers/src/Data/Map/Strict/Internal.hs index 9f731b38d..ee43926c4 100644 --- a/containers/src/Data/Map/Strict/Internal.hs +++ b/containers/src/Data/Map/Strict/Internal.hs @@ -333,6 +333,10 @@ import Data.Map.Internal , descLinkTop , descLinkAll , Stack (..) + , MapBuilder(..) + , emptyB + , insertB + , finishB , (!) , (!?) , (\\) @@ -375,7 +379,6 @@ import Data.Map.Internal , foldrWithKey , foldrWithKey' , glue - , insertMax , intersection , isProperSubmapOf , isProperSubmapOfBy @@ -430,7 +433,6 @@ import qualified Data.Set.Internal as Set import qualified Data.Map.Internal as L import Utils.Containers.Internal.StrictPair -import Data.Bits (shiftL, shiftR) #ifdef __GLASGOW_HASKELL__ import Data.Coerce #endif @@ -1448,7 +1450,8 @@ mapAccumRWithKey f a (Bin sx kx x l r) = -- Also see the performance note on 'fromListWith'. mapKeysWith :: Ord k2 => (a -> a -> a) -> (k1->k2) -> Map k1 a -> Map k2 a -mapKeysWith c f = fromListWith c . foldrWithKey (\k x xs -> (f k, x) : xs) [] +mapKeysWith c f m = + finishB (foldlWithKey' (\b kx x -> insertWithB c (f kx) x b) emptyB m) #if __GLASGOW_HASKELL__ {-# INLINABLE mapKeysWith #-} #endif @@ -1489,46 +1492,10 @@ fromArgSet (Set.Bin sz (Arg x v) l r) = v `seq` Bin sz x v (fromArgSet l) (fromA -- > fromList [(5,"a"), (3,"b"), (5, "c")] == fromList [(5,"c"), (3,"b")] -- > fromList [(5,"c"), (3,"b"), (5, "a")] == fromList [(5,"a"), (3,"b")] --- For some reason, when 'singleton' is used in fromList or in --- create, it is not inlined, so we inline it manually. fromList :: Ord k => [(k,a)] -> Map k a -fromList [] = Tip -fromList [(kx, x)] = x `seq` Bin 1 kx x Tip Tip -fromList ((kx0, x0) : xs0) | not_ordered kx0 xs0 = x0 `seq` fromList' (Bin 1 kx0 x0 Tip Tip) xs0 - | otherwise = x0 `seq` go (1::Int) (Bin 1 kx0 x0 Tip Tip) xs0 - where - not_ordered _ [] = False - not_ordered kx ((ky,_) : _) = kx >= ky - {-# INLINE not_ordered #-} - - fromList' t0 xs = Foldable.foldl' ins t0 xs - where ins t (k,x) = insert k x t - - go !_ t [] = t - go _ t [(kx, x)] = x `seq` insertMax kx x t - go s l xs@((kx, x) : xss) | not_ordered kx xss = fromList' l xs - | otherwise = case create s xss of - (r, ys, []) -> x `seq` go (s `shiftL` 1) (link kx x l r) ys - (r, _, ys) -> x `seq` fromList' (link kx x l r) ys - - -- The create is returning a triple (tree, xs, ys). Both xs and ys - -- represent not yet processed elements and only one of them can be nonempty. - -- If ys is nonempty, the keys in ys are not ordered with respect to tree - -- and must be inserted using fromList'. Otherwise the keys have been - -- ordered so far. - create !_ [] = (Tip, [], []) - create s xs@(xp : xss) - | s == 1 = case xp of (kx, x) | not_ordered kx xss -> x `seq` (Bin 1 kx x Tip Tip, [], xss) - | otherwise -> x `seq` (Bin 1 kx x Tip Tip, xss, []) - | otherwise = case create (s `shiftR` 1) xs of - res@(_, [], _) -> res - (l, [(ky, y)], zs) -> y `seq` (insertMax ky y l, [], zs) - (l, ys@((ky, y):yss), _) | not_ordered ky yss -> (l, [], ys) - | otherwise -> case create (s `shiftR` 1) yss of - (r, zs, ws) -> y `seq` (link ky y l r, zs, ws) -#if __GLASGOW_HASKELL__ -{-# INLINABLE fromList #-} -#endif +fromList xs = + finishB (Foldable.foldl' (\b (kx, !x) -> insertB kx x b) emptyB xs) +{-# INLINE fromList #-} -- INLINE for fusion -- | \(O(n \log n)\). Build a map from a list of key\/value pairs with a combining function. See also 'fromAscListWith'. -- @@ -1567,11 +1534,9 @@ fromList ((kx0, x0) : xs0) | not_ordered kx0 xs0 = x0 `seq` fromList' (Bin 1 kx0 -- > fromListWith (++) $ reverse $ map (\(k, v) -> (k, [v])) someListOfTuples fromListWith :: Ord k => (a -> a -> a) -> [(k,a)] -> Map k a -fromListWith f xs - = fromListWithKey (\_ x y -> f x y) xs -#if __GLASGOW_HASKELL__ -{-# INLINABLE fromListWith #-} -#endif +fromListWith f xs = + finishB (Foldable.foldl' (\b (kx, x) -> insertWithB f kx x b) emptyB xs) +{-# INLINE fromListWith #-} -- INLINE for fusion -- | \(O(n \log n)\). Build a map from a list of key\/value pairs with a combining function. See also 'fromAscListWithKey'. -- @@ -1582,13 +1547,9 @@ fromListWith f xs -- Also see the performance note on 'fromListWith'. fromListWithKey :: Ord k => (k -> a -> a -> a) -> [(k,a)] -> Map k a -fromListWithKey f xs - = Foldable.foldl' ins empty xs - where - ins t (k,x) = insertWithKey f k x t -#if __GLASGOW_HASKELL__ -{-# INLINABLE fromListWithKey #-} -#endif +fromListWithKey f xs = + finishB (Foldable.foldl' (\b (kx, x) -> insertWithB (f kx) kx x b) emptyB xs) +{-# INLINE fromListWithKey #-} -- INLINE for fusion {-------------------------------------------------------------------- Building trees from ascending/descending lists can be done in linear time. @@ -1731,3 +1692,25 @@ fromDistinctDescList xs = descLinkAll (Foldable.foldl' next Nada xs) next (Push ky y Tip stk) (!kx, !x) = descLinkTop kx x 1 (singleton ky y) stk next stk (!ky, !y) = Push ky y Tip stk {-# INLINE fromDistinctDescList #-} -- INLINE for fusion + +{-------------------------------------------------------------------- + MapBuilder +--------------------------------------------------------------------} + +-- Insert a key and value. The new value is combined with the old value if one +-- already exists for the key. Strict in the inserted value. +insertWithB + :: Ord k => (a -> a -> a) -> k -> a -> MapBuilder k a -> MapBuilder k a +insertWithB f !ky y b = case b of + BAsc stk -> case stk of + Push kx x l stk' -> case compare ky kx of + LT -> BMap (insertWith f ky y (ascLinkAll stk)) + EQ -> BAsc (push' ky (f y x) l stk') + GT -> case l of + Tip -> y `seq` BAsc (ascLinkTop stk' 1 (singleton kx x) ky y) + Bin{} -> BAsc (push' ky y Tip stk) + Nada -> BAsc (push' ky y Tip Nada) + BMap m -> BMap (insertWith f ky y m) + where + push' kx !x = Push kx x +{-# INLINE insertWithB #-} diff --git a/containers/src/Data/Set/Internal.hs b/containers/src/Data/Set/Internal.hs index 82c41496c..3c7ff0fc1 100644 --- a/containers/src/Data/Set/Internal.hs +++ b/containers/src/Data/Set/Internal.hs @@ -238,7 +238,6 @@ import Utils.Containers.Internal.Prelude hiding import Prelude () import Control.Applicative (Const(..)) import qualified Data.List as List -import Data.Bits (shiftL, shiftR) import Data.Semigroup (Semigroup(..), stimesIdempotentMonoid, stimesIdempotent) import Data.Functor.Classes import Data.Functor.Identity (Identity) @@ -1000,11 +999,13 @@ partition p0 t0 = toPair $ go p0 t0 -- | \(O(n \log n)\). -- @'map' f s@ is the set obtained by applying @f@ to each element of @s@. -- +-- If `f` is monotonically non-decreasing, this function takes \(O(n)\) time. +-- -- It's worth noting that the size of the result may be smaller if, -- for some @(x,y)@, @x \/= y && f x == f y@ map :: Ord b => (a->b) -> Set a -> Set b -map f = fromList . List.map f . toList +map f t = finishB (foldl' (\b x -> insertB (f x) b) emptyB t) #if __GLASGOW_HASKELL__ {-# INLINABLE map #-} #endif @@ -1146,48 +1147,11 @@ foldlFB = foldl -- | \(O(n \log n)\). Create a set from a list of elements. -- --- If the elements are ordered, a linear-time implementation is used. - --- For some reason, when 'singleton' is used in fromList or in --- create, it is not inlined, so we inline it manually. +-- If the elements are in non-decreasing order, this function takes \(O(n)\) +-- time. fromList :: Ord a => [a] -> Set a -fromList [] = Tip -fromList [x] = Bin 1 x Tip Tip -fromList (x0 : xs0) | not_ordered x0 xs0 = fromList' (Bin 1 x0 Tip Tip) xs0 - | otherwise = go (1::Int) (Bin 1 x0 Tip Tip) xs0 - where - not_ordered _ [] = False - not_ordered x (y : _) = x >= y - {-# INLINE not_ordered #-} - - fromList' t0 xs = Foldable.foldl' ins t0 xs - where ins t x = insert x t - - go !_ t [] = t - go _ t [x] = insertMax x t - go s l xs@(x : xss) | not_ordered x xss = fromList' l xs - | otherwise = case create s xss of - (r, ys, []) -> go (s `shiftL` 1) (link x l r) ys - (r, _, ys) -> fromList' (link x l r) ys - - -- The create is returning a triple (tree, xs, ys). Both xs and ys - -- represent not yet processed elements and only one of them can be nonempty. - -- If ys is nonempty, the keys in ys are not ordered with respect to tree - -- and must be inserted using fromList'. Otherwise the keys have been - -- ordered so far. - create !_ [] = (Tip, [], []) - create s xs@(x : xss) - | s == 1 = if not_ordered x xss then (Bin 1 x Tip Tip, [], xss) - else (Bin 1 x Tip Tip, xss, []) - | otherwise = case create (s `shiftR` 1) xs of - res@(_, [], _) -> res - (l, [y], zs) -> (insertMax y l, [], zs) - (l, ys@(y:yss), _) | not_ordered y yss -> (l, [], ys) - | otherwise -> case create (s `shiftR` 1) yss of - (r, zs, ws) -> (link y l r, zs, ws) -#if __GLASGOW_HASKELL__ -{-# INLINABLE fromList #-} -#endif +fromList xs = finishB (Foldable.foldl' (flip insertB) emptyB xs) +{-# INLINE fromList #-} -- INLINE for fusion {-------------------------------------------------------------------- Building trees from ascending/descending lists can be done in linear time. @@ -1665,6 +1629,55 @@ spanAntitone p0 m = toPair (go p0 m) | p x = let u :*: v = go p r in link x l u :*: v | otherwise = let u :*: v = go p l in u :*: link x v r +{-------------------------------------------------------------------- + SetBuilder +--------------------------------------------------------------------} + +-- Note [SetBuilder] +-- ~~~~~~~~~~~~~~~~~ +-- SetBuilder serves as an accumulator for element-by-element construction of +-- a Set. It can be used in folds to construct sets. This plays nicely with list +-- fusion if the structure folded over is a list, as in fromList and friends. +-- +-- As long as the elements are in non-decreasing order, insertB accumulates them +-- in a Stack, just as fromDistinctAscList does. On encountering an element out +-- of order, it builds a Set from the Stack and switches to using insert for all +-- future elements. This gives us construction in O(n) if the elements are +-- already sorted. If not, the worst case remains O(n log n). +-- +-- More complicated implementations are possible, such as repeatedly +-- accumulating runs of increasing elements in Stacks (not just once) and +-- union-ing with an accumulated Set, but this makes the worst case somewhat +-- slower (~10%). + +data SetBuilder a + = BAsc !(Stack a) + | BSet !(Set a) + +-- Empty builder. +emptyB :: SetBuilder a +emptyB = BAsc Nada + +-- Insert an element. Replaces the old element if an equal element already +-- exists. +insertB :: Ord a => a -> SetBuilder a -> SetBuilder a +insertB !y b = case b of + BAsc stk -> case stk of + Push x l stk' -> case compare y x of + LT -> BSet (insert y (ascLinkAll stk)) + EQ -> BAsc (Push y l stk') + GT -> case l of + Tip -> BAsc (ascLinkTop stk' 1 (singleton x) y) + Bin{} -> BAsc (Push y Tip stk) + Nada -> BAsc (Push y Tip Nada) + BSet m -> BSet (insert y m) +{-# INLINE insertB #-} + +-- Finalize the builder into a Set. +finishB :: SetBuilder a -> Set a +finishB (BAsc stk) = ascLinkAll stk +finishB (BSet s) = s +{-# INLINABLE finishB #-} {-------------------------------------------------------------------- Utility functions that maintain the balance properties of the tree. From e8e81ac69bb63a6aa47856a7d3ec8a04f88ca79c Mon Sep 17 00:00:00 2001 From: meooow25 Date: Sun, 5 Jan 2025 19:12:52 +0530 Subject: [PATCH 4/5] Update changelog --- containers/changelog.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/containers/changelog.md b/containers/changelog.md index 150e03d71..bb5db1675 100644 --- a/containers/changelog.md +++ b/containers/changelog.md @@ -63,6 +63,9 @@ * Improved performance for `Data.Set` and `Data.Map`'s `fromAscList*` and `fromDescList*` functions. +* Improved performance for `Data.Set`'s `fromList`, `map` and `Data.Map`'s + `fromList`, `fromListWith`, `fromListWithKey`, `mapKeys`, `mapKeysWith`. + ## Unreleased with `@since` annotation for 0.7.1: ### Additions From b8f601663a74e1c3edbf1f018830bd999027eaa5 Mon Sep 17 00:00:00 2001 From: meooow25 Date: Sun, 5 Jan 2025 22:27:53 +0530 Subject: [PATCH 5/5] Missed strict Map docs --- containers/src/Data/Map/Strict/Internal.hs | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/containers/src/Data/Map/Strict/Internal.hs b/containers/src/Data/Map/Strict/Internal.hs index ee43926c4..8b056dc23 100644 --- a/containers/src/Data/Map/Strict/Internal.hs +++ b/containers/src/Data/Map/Strict/Internal.hs @@ -1439,6 +1439,8 @@ mapAccumRWithKey f a (Bin sx kx x l r) = -- | \(O(n \log n)\). -- @'mapKeysWith' c f s@ is the map obtained by applying @f@ to each key of @s@. -- +-- If `f` is monotonically non-decreasing, this function takes \(O(n)\) time. +-- -- The size of the result may be smaller if @f@ maps two or more distinct -- keys to the same new key. In this case the associated values will be -- combined using @c@. The value at the greater of the two original keys @@ -1486,7 +1488,7 @@ fromArgSet (Set.Bin sz (Arg x v) l r) = v `seq` Bin sz x v (fromArgSet l) (fromA -- If the list contains more than one value for the same key, the last value -- for the key is retained. -- --- If the keys of the list are ordered, a linear-time implementation is used. +-- If the keys are in non-decreasing order, this function takes \(O(n)\) time. -- -- > fromList [] == empty -- > fromList [(5,"a"), (3,"b"), (5, "c")] == fromList [(5,"c"), (3,"b")] @@ -1499,6 +1501,8 @@ fromList xs = -- | \(O(n \log n)\). Build a map from a list of key\/value pairs with a combining function. See also 'fromAscListWith'. -- +-- If the keys are in non-decreasing order, this function takes \(O(n)\) time. +-- -- > fromListWith (++) [(5,"a"), (5,"b"), (3,"x"), (5,"c")] == fromList [(3, "x"), (5, "cba")] -- > fromListWith (++) [] == empty -- @@ -1540,6 +1544,8 @@ fromListWith f xs = -- | \(O(n \log n)\). Build a map from a list of key\/value pairs with a combining function. See also 'fromAscListWithKey'. -- +-- If the keys are in non-decreasing order, this function takes \(O(n)\) time. +-- -- > let f key new_value old_value = show key ++ ":" ++ new_value ++ "|" ++ old_value -- > fromListWithKey f [(5,"a"), (5,"b"), (3,"b"), (3,"a"), (5,"c")] == fromList [(3, "3:a|b"), (5, "5:c|5:b|a")] -- > fromListWithKey f [] == empty