From 04e3ca4bb8e6aff06604054928f3d4d641c3642d Mon Sep 17 00:00:00 2001 From: Simon Jakobi Date: Mon, 25 Apr 2022 01:30:18 +0200 Subject: [PATCH 1/6] Optimize updateOrConcatWithKey Fixes #403. --- Data/HashMap/Internal.hs | 108 +++++++++++++++++++++++---------------- 1 file changed, 63 insertions(+), 45 deletions(-) diff --git a/Data/HashMap/Internal.hs b/Data/HashMap/Internal.hs index 20d16c86..b066cfce 100644 --- a/Data/HashMap/Internal.hs +++ b/Data/HashMap/Internal.hs @@ -1908,30 +1908,32 @@ intersectionCollisions f h1 h2 ary1 ary2 1 -> Leaf h1 <$> A.read mary 0 _ -> Collision h1 <$> (A.unsafeFreeze =<< A.shrink mary len) | otherwise = Empty + where + -- Say we have + -- @ + -- 1 2 3 4 + -- @ + -- and we search for @3@. Then we can mutate the array to + -- @ + -- undefined 2 1 4 + -- @ + -- We don't actually need to write undefined, we just have to make sure that + -- the next search starts 1 after the current one. + searchSwap :: Eq k => k -> Int -> A.MArray s (Leaf k v) -> ST s (Maybe (Leaf k v)) + searchSwap toFind start = go start toFind start + where + go i0 k i mary + | i >= A.lengthM mary = pure Nothing + | otherwise = do + l@(L k' _v) <- A.read mary i + if k == k' + then do + A.write mary i =<< A.read mary i0 + pure $ Just l + else go i0 k (i + 1) mary + {-# INLINE searchSwap #-} {-# INLINE intersectionCollisions #-} --- | Say we have --- @ --- 1 2 3 4 --- @ --- and we search for @3@. Then we can mutate the array to --- @ --- undefined 2 1 4 --- @ --- We don't actually need to write undefined, we just have to make sure that the next search starts 1 after the current one. -searchSwap :: Eq k => k -> Int -> A.MArray s (Leaf k v) -> ST s (Maybe (Leaf k v)) -searchSwap toFind start = go start toFind start - where - go i0 k i mary - | i >= A.lengthM mary = pure Nothing - | otherwise = do - l@(L k' _v) <- A.read mary i - if k == k' - then do - A.write mary i =<< A.read mary i0 - pure $ Just l - else go i0 k (i + 1) mary -{-# INLINE searchSwap #-} ------------------------------------------------------------------------ -- * Folds @@ -2307,33 +2309,49 @@ updateOrSnocWithKey f k0 v0 ary0 = go k0 v0 ary0 0 (A.length ary0) updateOrConcatWithKey :: Eq k => (k -> v -> v -> (# v #)) -> A.Array (Leaf k v) -> A.Array (Leaf k v) -> A.Array (Leaf k v) updateOrConcatWithKey f ary1 ary2 = A.run $ do - -- TODO: instead of mapping and then folding, should we traverse? - -- We'll have to be careful to avoid allocating pairs or similar. - - -- first: look up the position of each element of ary2 in ary1 - let indices = A.map' (\(L k _) -> indexOf k ary1) ary2 - -- that tells us how large the overlap is: - -- count number of Nothing constructors - let nOnly2 = A.foldl' (\n -> maybe (n+1) (const n)) 0 indices let n1 = A.length ary1 let n2 = A.length ary2 + mary <- A.new (n1 + n2) (A.index ary1 1) -- copy over all elements from ary1 - mary <- A.new_ (n1 + nOnly2) - A.copy ary1 0 mary 0 n1 + A.copy ary1 1 mary 1 (n1-1) -- append or update all elements from ary2 - let go !iEnd !i2 - | i2 >= n2 = return () - | otherwise = case A.index indices i2 of - Just i1 -> do -- key occurs in both arrays, store combination in position i1 - L k v1 <- A.indexM ary1 i1 - L _ v2 <- A.indexM ary2 i2 - case f k v1 v2 of (# v3 #) -> A.write mary i1 (L k v3) - go iEnd (i2+1) - Nothing -> do -- key is only in ary2, append to end - A.write mary iEnd =<< A.indexM ary2 i2 - go (iEnd+1) (i2+1) - go n1 0 - return mary + let go !iEnd !i2 !iMut + | i2 >= n2 = return iEnd + | otherwise = do + l@(L k v2) <- A.indexM ary2 i2 + res <- searchSwap k iMut n2 mary + case res of + Just (L _ v1) -> do -- key occurs in both arrays, store combination in position iMut + case f k v1 v2 of (# v3 #) -> A.write mary iMut (L k v3) + go iEnd (i2+1) (iMut+1) + Nothing -> do -- key is only in ary2, append to end + A.write mary iEnd l + go (iEnd+1) (i2+1) iMut + n <- go n1 0 0 + A.shrink mary n + where + -- Say we have + -- @ + -- 1 2 3 4 + -- @ + -- and we search for @3@. Then we can mutate the array to + -- @ + -- 3 2 1 4 + -- @ + searchSwap :: Eq k => k -> Int -> Int -> A.MArray s (Leaf k v) -> ST s (Maybe (Leaf k v)) + searchSwap toFind start end = go start toFind start + where + go i0 k i mary + | i >= end = pure Nothing + | otherwise = do + l@(L k' _v) <- A.read mary i + if k == k' + then do + A.write mary i =<< A.read mary i0 + A.write mary i0 l + pure $ Just l + else go i0 k (i + 1) mary + {-# INLINE searchSwap #-} {-# INLINABLE updateOrConcatWithKey #-} -- | \(O(n*m)\) Check if the first array is a subset of the second array. From e0d7141dfcdca3023a2ed63e7eda566b0f34cc93 Mon Sep 17 00:00:00 2001 From: Simon Jakobi Date: Mon, 25 Apr 2022 02:38:41 +0200 Subject: [PATCH 2/6] Revert new_ optimization Undefineds might be useful for debugging --- Data/HashMap/Internal.hs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Data/HashMap/Internal.hs b/Data/HashMap/Internal.hs index b066cfce..6e090d7a 100644 --- a/Data/HashMap/Internal.hs +++ b/Data/HashMap/Internal.hs @@ -2311,9 +2311,9 @@ updateOrConcatWithKey :: Eq k => (k -> v -> v -> (# v #)) -> A.Array (Leaf k v) updateOrConcatWithKey f ary1 ary2 = A.run $ do let n1 = A.length ary1 let n2 = A.length ary2 - mary <- A.new (n1 + n2) (A.index ary1 1) + mary <- A.new_ (n1 + n2) -- copy over all elements from ary1 - A.copy ary1 1 mary 1 (n1-1) + A.copy ary1 0 mary 0 n1 -- append or update all elements from ary2 let go !iEnd !i2 !iMut | i2 >= n2 = return iEnd From d670ac748f882931291f074843ab34251b7002b5 Mon Sep 17 00:00:00 2001 From: Simon Jakobi Date: Mon, 25 Apr 2022 02:46:26 +0200 Subject: [PATCH 3/6] Fix the bug --- Data/HashMap/Internal.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Data/HashMap/Internal.hs b/Data/HashMap/Internal.hs index 6e090d7a..ad592fde 100644 --- a/Data/HashMap/Internal.hs +++ b/Data/HashMap/Internal.hs @@ -2319,7 +2319,7 @@ updateOrConcatWithKey f ary1 ary2 = A.run $ do | i2 >= n2 = return iEnd | otherwise = do l@(L k v2) <- A.indexM ary2 i2 - res <- searchSwap k iMut n2 mary + res <- searchSwap k iMut n1 mary case res of Just (L _ v1) -> do -- key occurs in both arrays, store combination in position iMut case f k v1 v2 of (# v3 #) -> A.write mary iMut (L k v3) From 0301a3383389a37ba3d6ad9e38b09857876976fa Mon Sep 17 00:00:00 2001 From: Simon Jakobi Date: Mon, 25 Apr 2022 02:46:40 +0200 Subject: [PATCH 4/6] Revert "Revert new_ optimization" This reverts commit 3315dbaf6b2fe1388d15937a8b59120355586f12. --- Data/HashMap/Internal.hs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Data/HashMap/Internal.hs b/Data/HashMap/Internal.hs index ad592fde..341fbdea 100644 --- a/Data/HashMap/Internal.hs +++ b/Data/HashMap/Internal.hs @@ -2311,9 +2311,9 @@ updateOrConcatWithKey :: Eq k => (k -> v -> v -> (# v #)) -> A.Array (Leaf k v) updateOrConcatWithKey f ary1 ary2 = A.run $ do let n1 = A.length ary1 let n2 = A.length ary2 - mary <- A.new_ (n1 + n2) + mary <- A.new (n1 + n2) (A.index ary1 1) -- copy over all elements from ary1 - A.copy ary1 0 mary 0 n1 + A.copy ary1 1 mary 1 (n1-1) -- append or update all elements from ary2 let go !iEnd !i2 !iMut | i2 >= n2 = return iEnd From f13f24604573b4d0b4dd5925f7d400f969eb4451 Mon Sep 17 00:00:00 2001 From: Simon Jakobi Date: Mon, 25 Apr 2022 02:50:40 +0200 Subject: [PATCH 5/6] Fix array initialization trick --- Data/HashMap/Internal.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Data/HashMap/Internal.hs b/Data/HashMap/Internal.hs index 341fbdea..46dba689 100644 --- a/Data/HashMap/Internal.hs +++ b/Data/HashMap/Internal.hs @@ -2311,7 +2311,7 @@ updateOrConcatWithKey :: Eq k => (k -> v -> v -> (# v #)) -> A.Array (Leaf k v) updateOrConcatWithKey f ary1 ary2 = A.run $ do let n1 = A.length ary1 let n2 = A.length ary2 - mary <- A.new (n1 + n2) (A.index ary1 1) + mary <- A.new (n1 + n2) (A.index ary1 0) -- copy over all elements from ary1 A.copy ary1 1 mary 1 (n1-1) -- append or update all elements from ary2 From 76175536c6b00f4bbd12da06436e6a57e12ec1f6 Mon Sep 17 00:00:00 2001 From: Simon Jakobi Date: Mon, 25 Apr 2022 02:51:35 +0200 Subject: [PATCH 6/6] Add comments --- Data/HashMap/Internal.hs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Data/HashMap/Internal.hs b/Data/HashMap/Internal.hs index 46dba689..e9346cc6 100644 --- a/Data/HashMap/Internal.hs +++ b/Data/HashMap/Internal.hs @@ -2311,8 +2311,9 @@ updateOrConcatWithKey :: Eq k => (k -> v -> v -> (# v #)) -> A.Array (Leaf k v) updateOrConcatWithKey f ary1 ary2 = A.run $ do let n1 = A.length ary1 let n2 = A.length ary2 + -- initialize output array with first element of ary1 mary <- A.new (n1 + n2) (A.index ary1 0) - -- copy over all elements from ary1 + -- copy over remaining elements from ary1 A.copy ary1 1 mary 1 (n1-1) -- append or update all elements from ary2 let go !iEnd !i2 !iMut