Skip to content

Commit

Permalink
Improve HashSet API docs. (#267)
Browse files Browse the repository at this point in the history
- More information in introduction (basic operations, and using HashSet with custom
data types)
- Examples alongside function docs
  • Loading branch information
m-renaud authored Jul 20, 2020
1 parent 7485f5c commit afcbc77
Show file tree
Hide file tree
Showing 2 changed files with 143 additions and 30 deletions.
107 changes: 88 additions & 19 deletions Data/HashSet.hs
Original file line number Diff line number Diff line change
Expand Up @@ -4,25 +4,94 @@
#endif

------------------------------------------------------------------------
-- |
-- Module : Data.HashSet
-- Copyright : 2011 Bryan O'Sullivan
-- License : BSD-style
-- Maintainer : [email protected]
-- Stability : provisional
-- Portability : portable
--
-- A set of /hashable/ values. A set cannot contain duplicate items.
-- A 'HashSet' makes no guarantees as to the order of its elements.
--
-- The implementation is based on /hash array mapped trie/. A
-- 'HashSet' is often faster than other tree-based set types,
-- especially when value comparison is expensive, as in the case of
-- strings.
--
-- Many operations have a average-case complexity of /O(log n)/. The
-- implementation uses a large base (i.e. 16) so in practice these
-- operations are constant time.
{-|
Module : Data.HashSet
Copyright : 2011 Bryan O'Sullivan
License : BSD-style
Maintainer : [email protected]
Stability : provisional
Portability : portable
= Introduction
'HashSet' allows you to store /unique/ elements, providing efficient insertion,
lookups, and deletion. A 'HashSet' makes no guarantees as to the order of its
elements.
If you are storing sets of "Data.Int"s consider using "Data.IntSet" from the
<https://hackage.haskell.org/package/containers containers> package.
== Examples
All the examples below assume @HashSet@ is imported qualified, and uses the following @dataStructures@ set.
>>> import qualified Data.HashSet as HashSet
>>> let dataStructures = HashSet.fromList ["Set", "Map", "Graph", "Sequence"]
=== Basic Operations
Check membership in a set:
>>> -- Check if "Map" and "Trie" are in the set of data structures.
>>> HashSet.member "Map" dataStructures
True
>>> HashSet.member "Trie" dataStructures
False
Add a new entry to the set:
>>> let moreDataStructures = HashSet.insert "Trie" dataStructures
>>> HashSet.member "Trie" moreDataStructures
> True
Remove the @\"Graph\"@ entry from the set of data structures.
>>> let fewerDataStructures = HashSet.delete "Graph" dataStructures
>>> HashSet.toList fewerDataStructures
["Map","Set","Sequence"]
Create a new set and combine it with our original set.
>>> let unorderedDataStructures = HashSet.fromList ["HashSet", "HashMap"]
>>> HashSet.union dataStructures unorderedDataStructures
fromList ["Map","HashSet","Graph","HashMap","Set","Sequence"]
=== Using custom data with HashSet
To create a @HashSet@ of your custom type, the type must have instances for
'Data.Eq.Eq' and 'Data.Hashable.Hashable'. The @Hashable@ typeclass is defined in the
<https://hackage.haskell.org/package/hashable hashable> package, see the
documentation for information on how to make your type an instance of
@Hashable@.
We'll start by setting up our custom data type:
>>> :set -XDeriveGeneric
>>> import GHC.Generics (Generic)
>>> import Data.Hashable
>>> data Person = Person { name :: String, likesDogs :: Bool } deriving (Show, Eq, Generic)
>>> instance Hashable Person
And now we'll use it!
>>> let people = HashSet.fromList [Person "Lana" True, Person "Joe" False, Person "Simon" True]
>>> HashSet.filter likesDogs people
fromList [Person {name = "Simon", likesDogs = True},Person {name = "Lana", likesDogs = True}]
== Performance
The implementation is based on /hash array mapped tries/. A
'HashSet' is often faster than other 'Data.Ord.Ord'-based set types,
especially when value comparisons are expensive, as in the case of
strings.
Many operations have a average-case complexity of /O(log n)/. The
implementation uses a large base (i.e. 16) so in practice these
operations are constant time.
-}

module Data.HashSet
(
Expand Down
66 changes: 55 additions & 11 deletions Data/HashSet/Base.hs
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
-- A set of /hashable/ values. A set cannot contain duplicate items.
-- A 'HashSet' makes no guarantees as to the order of its elements.
--
-- The implementation is based on /hash array mapped trie/. A
-- The implementation is based on /hash array mapped tries/. A
-- 'HashSet' is often faster than other tree-based set types,
-- especially when value comparison is expensive, as in the case of
-- strings.
Expand All @@ -36,10 +36,6 @@ module Data.HashSet.Base
, empty
, singleton

-- * Combine
, union
, unions

-- * Basic interface
, null
, size
Expand All @@ -50,6 +46,10 @@ module Data.HashSet.Base
-- * Transformations
, map

-- * Combine
, union
, unions

-- * Difference and intersection
, difference
, intersection
Expand Down Expand Up @@ -260,24 +260,39 @@ hashSetDataType :: DataType
hashSetDataType = mkDataType "Data.HashSet.Base.HashSet" [fromListConstr]

-- | /O(1)/ Construct an empty set.
--
-- >>> HashSet.empty
-- fromList []
empty :: HashSet a
empty = HashSet H.empty

-- | /O(1)/ Construct a set with a single element.
--
-- >>> HashSet.singleton 1
-- fromList [1]
singleton :: Hashable a => a -> HashSet a
singleton a = HashSet (H.singleton a ())
{-# INLINABLE singleton #-}

-- | /O(1)/ Convert to the equivalent 'HashMap'.
-- | /O(1)/ Convert to set to the equivalent 'HashMap' with @()@ values.
--
-- >>> HashSet.toMap (HashSet.singleton 1)
-- fromList [(1,())]
toMap :: HashSet a -> HashMap a ()
toMap = asMap

-- | /O(1)/ Convert from the equivalent 'HashMap'.
-- | /O(1)/ Convert from the equivalent 'HashMap' with @()@ values.
--
-- >>> HashSet.fromMap (HashMap.singleton 1 ())
-- fromList [1]
fromMap :: HashMap a () -> HashSet a
fromMap = HashSet

-- | /O(n)/ Produce a 'HashSet' of all the keys in the given 'HashMap'.
--
-- >>> HashSet.keysSet (HashMap.fromList [(1, "a"), (2, "b")]
-- fromList [1,2]
--
-- @since 0.2.10.0
keysSet :: HashMap k a -> HashSet k
keysSet m = fromMap (() <$ m)
Expand All @@ -287,8 +302,6 @@ keysSet m = fromMap (() <$ m)
-- To obtain good performance, the smaller set must be presented as
-- the first argument.
--
-- ==== __Examples__
--
-- >>> union (fromList [1,2]) (fromList [2,3])
-- fromList [1,2,3]
union :: (Eq a, Hashable a) => HashSet a -> HashSet a -> HashSet a
Expand All @@ -303,48 +316,79 @@ unions = List.foldl' union empty
{-# INLINE unions #-}

-- | /O(1)/ Return 'True' if this set is empty, 'False' otherwise.
--
-- >>> HashSet.null HashSet.empty
-- True
-- >>> HashSet.null (HashSet.singleton 1)
-- False
null :: HashSet a -> Bool
null = H.null . asMap
{-# INLINE null #-}

-- | /O(n)/ Return the number of elements in this set.
--
-- >>> HashSet.size HashSet.empty
-- 0
-- >>> HashSet.size (HashSet.fromList [1,2,3])
-- 3
size :: HashSet a -> Int
size = H.size . asMap
{-# INLINE size #-}

-- | /O(log n)/ Return 'True' if the given value is present in this
-- set, 'False' otherwise.
--
-- >>> HashSet.member 1 (Hashset.fromList [1,2,3])
-- True
-- >>> HashSet.member 1 (Hashset.fromList [4,5,6])
-- False
member :: (Eq a, Hashable a) => a -> HashSet a -> Bool
member a s = case H.lookup a (asMap s) of
Just _ -> True
_ -> False
{-# INLINABLE member #-}

-- | /O(log n)/ Add the specified value to this set.
--
-- >>> HashSet.insert 1 HashSet.empty
-- fromList [1]
insert :: (Eq a, Hashable a) => a -> HashSet a -> HashSet a
insert a = HashSet . H.insert a () . asMap
{-# INLINABLE insert #-}

-- | /O(log n)/ Remove the specified value from this set if
-- present.
-- | /O(log n)/ Remove the specified value from this set if present.
--
-- >>> HashSet.delete 1 (HashSet.fromList [1,2,3])
-- fromList [2,3]
-- >>> HashSet.delete 1 (HashSet.fromList [4,5,6])
-- fromList [4,5,6]
delete :: (Eq a, Hashable a) => a -> HashSet a -> HashSet a
delete a = HashSet . H.delete a . asMap
{-# INLINABLE delete #-}

-- | /O(n)/ Transform this set by applying a function to every value.
-- The resulting set may be smaller than the source.
--
-- >>> HashSet.map show (HashSet.fromList [1,2,3])
-- HashSet.fromList ["1","2","3"]
map :: (Hashable b, Eq b) => (a -> b) -> HashSet a -> HashSet b
map f = fromList . List.map f . toList
{-# INLINE map #-}

-- | /O(n)/ Difference of two sets. Return elements of the first set
-- not existing in the second.
--
-- >>> HashSet.difference (HashSet.fromList [1,2,3]) (HashSet.fromList [2,3,4])
-- fromList [1]
difference :: (Eq a, Hashable a) => HashSet a -> HashSet a -> HashSet a
difference (HashSet a) (HashSet b) = HashSet (H.difference a b)
{-# INLINABLE difference #-}

-- | /O(n)/ Intersection of two sets. Return elements present in both
-- the first set and the second.
--
-- >>> HashSet.intersection (HashSet.fromList [1,2,3]) (HashSet.fromList [2,3,4])
-- fromList [2,3]
intersection :: (Eq a, Hashable a) => HashSet a -> HashSet a -> HashSet a
intersection (HashSet a) (HashSet b) = HashSet (H.intersection a b)
{-# INLINABLE intersection #-}
Expand Down

0 comments on commit afcbc77

Please sign in to comment.