From ad0bf437dd8a47db12305e6329eb222e00faf96e Mon Sep 17 00:00:00 2001 From: Nicholas Scheel Date: Fri, 11 Aug 2017 16:30:52 -0500 Subject: [PATCH 01/18] Make splitAt total --- src/Data/String.js | 12 +++--------- src/Data/String.purs | 13 ++++--------- test/Test/Data/String.purs | 24 +++++++++++------------- 3 files changed, 18 insertions(+), 31 deletions(-) diff --git a/src/Data/String.js b/src/Data/String.js index c06bfcd..4286544 100644 --- a/src/Data/String.js +++ b/src/Data/String.js @@ -145,15 +145,9 @@ exports.split = function (sep) { }; }; -exports._splitAt = function (just) { - return function (nothing) { - return function (i) { - return function (s) { - return i >= 0 && i < s.length ? - just({ before: s.substring(0, i), after: s.substring(i) }) : - nothing; - }; - }; +exports.splitAt = function (i) { + return function (s) { + return { before: s.substring(0, i), after: s.substring(i) }; }; }; diff --git a/src/Data/String.purs b/src/Data/String.purs index 6092ef6..7329d22 100644 --- a/src/Data/String.purs +++ b/src/Data/String.purs @@ -231,15 +231,10 @@ foreign import count :: (Char -> Boolean) -> String -> Int -- | * `split (Pattern " ") "hello world" == ["hello", "world"]` foreign import split :: Pattern -> String -> Array String --- | Returns the substrings of split at the given index, if the index is within bounds. -splitAt :: Int -> String -> Maybe { before :: String, after :: String } -splitAt = _splitAt Just Nothing - -foreign import _splitAt :: (forall a. a -> Maybe a) - -> (forall a. Maybe a) - -> Int - -> String - -> Maybe { before :: String, after :: String } +-- | Returns a string split into two substrings at the given index, where +-- | `before` includes all of the characters up to the given index, and `after` +-- | is the rest of the string, from the given index on. +foreign import splitAt :: Int -> String -> { before :: String, after :: String } -- | Converts the string into an array of characters. foreign import toCharArray :: String -> Array Char diff --git a/test/Test/Data/String.purs b/test/Test/Data/String.purs index 94cce8e..454d294 100644 --- a/test/Test/Data/String.purs +++ b/test/Test/Data/String.purs @@ -5,7 +5,7 @@ import Prelude (Unit, Ordering(..), (==), ($), discard, negate, not, (/=), (&&)) import Control.Monad.Eff (Eff) import Control.Monad.Eff.Console (CONSOLE, log) -import Data.Maybe (Maybe(..), isNothing, maybe) +import Data.Maybe (Maybe(..), isNothing) import Data.String import Test.Assert (ASSERT, assert) @@ -160,19 +160,17 @@ testString = do assert $ split (Pattern "d") "abc" == ["abc"] log "splitAt" - let testSplitAt i str res = + let testSplitAt i str r = assert $ case splitAt i str of - Nothing -> - isNothing res - Just { before, after } -> - maybe false (\r -> - r.before == before && r.after == after) res - - testSplitAt 1 "" Nothing - testSplitAt 0 "a" $ Just {before: "", after: "a"} - testSplitAt 1 "ab" $ Just {before: "a", after: "b"} - testSplitAt 3 "aabcc" $ Just {before: "aab", after: "cc"} - testSplitAt (-1) "abc" $ Nothing + { before, after } -> + r.before == before && r.after == after + + testSplitAt 1 "" { before: "", after: "" } + testSplitAt 0 "a" { before: "", after: "a" } + testSplitAt 1 "a" { before: "a", after: "" } + testSplitAt 1 "ab" { before: "a", after: "b" } + testSplitAt 3 "aabcc" { before: "aab", after: "cc" } + testSplitAt (-1) "abc" { before: "", after: "abc" } log "toCharArray" assert $ toCharArray "" == [] From 9f5d65b32002262cb98a536552ae4f8b5ffaa12c Mon Sep 17 00:00:00 2001 From: Nicholas Scheel Date: Fri, 11 Aug 2017 17:07:31 -0500 Subject: [PATCH 02/18] Update documentation for splitAt --- src/Data/String.purs | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/src/Data/String.purs b/src/Data/String.purs index 7329d22..d6ef99e 100644 --- a/src/Data/String.purs +++ b/src/Data/String.purs @@ -232,8 +232,19 @@ foreign import count :: (Char -> Boolean) -> String -> Int foreign import split :: Pattern -> String -> Array String -- | Returns a string split into two substrings at the given index, where --- | `before` includes all of the characters up to the given index, and `after` --- | is the rest of the string, from the given index on. +-- | `before` includes all of the characters up to (but not including) the +-- | given index, and `after` is the rest of the string, from the given index +-- | on. +-- | +-- | Thus the length of `(splitAt i s).before` will equal either `i` or +-- | `length s`, if that is shorter. (Or if `i` is negative the length will be +-- | 0.) +-- | +-- | In code: +-- | ```purescript +-- | length (splitAt i s).before == min (max i 0) (length s) +-- | (splitAt i s).before <> (splitAt i s).after == s +-- | ``` foreign import splitAt :: Int -> String -> { before :: String, after :: String } -- | Converts the string into an array of characters. From d69ea5f1a9805bfaca16d9314e4db3fffd1292b7 Mon Sep 17 00:00:00 2001 From: Nicholas Scheel Date: Fri, 11 Aug 2017 17:42:48 -0500 Subject: [PATCH 03/18] Make change for CodePoints too --- src/Data/String/CodePoints.purs | 21 ++++++-------- test/Test/Data/String/CodePoints.purs | 41 +++++++++++++-------------- 2 files changed, 28 insertions(+), 34 deletions(-) diff --git a/src/Data/String/CodePoints.purs b/src/Data/String/CodePoints.purs index 7fe51f3..32ddcb2 100644 --- a/src/Data/String/CodePoints.purs +++ b/src/Data/String/CodePoints.purs @@ -214,19 +214,16 @@ singletonFallback (CodePoint cp) = fromCharCode lead <> fromCharCode trail --- | Returns a record with strings created from the code points on either side --- | of the given index. If the index is not within the string, Nothing is --- | returned. -splitAt :: Int -> String -> Maybe { before :: String, after :: String } +-- | Splits a string into two substrings, where `before` contains the code +-- | points up to (but not including) the given index, and `after` contains the +-- | rest of the string, from that index on. +splitAt :: Int -> String -> { before :: String, after :: String } splitAt i s = - let cps = toCodePointArray s in - if i < 0 || Array.length cps < i - then Nothing - else Just { - before: fromCodePointArray (Array.take i cps), - after: fromCodePointArray (Array.drop i cps) - } - + let before = take i s in + { before + -- inline drop i s to reuse the result of take i s + , after: String.drop (String.length before) s + } -- | Returns a string containing the given number of code points from the -- | beginning of the given string. If the string does not have that many code diff --git a/test/Test/Data/String/CodePoints.purs b/test/Test/Data/String/CodePoints.purs index 7a6aef0..c5d305e 100644 --- a/test/Test/Data/String/CodePoints.purs +++ b/test/Test/Data/String/CodePoints.purs @@ -137,29 +137,26 @@ testStringCodePoints = do assert $ (singleton <$> codePointFromInt 0x16805) == Just "\x16805" log "splitAt" - let testSplitAt i s res = + let testSplitAt i s r = assert $ case splitAt i s of - Nothing -> - isNothing res - Just { before, after } -> - maybe false (\r -> - r.before == before && r.after == after) res - - testSplitAt 0 "" $ Just {before: "", after: ""} - testSplitAt 1 "" Nothing - testSplitAt 0 "a" $ Just {before: "", after: "a"} - testSplitAt 1 "ab" $ Just {before: "a", after: "b"} - testSplitAt 3 "aabcc" $ Just {before: "aab", after: "cc"} - testSplitAt (-1) "abc" $ Nothing - testSplitAt 0 str $ Just {before: "", after: str} - testSplitAt 1 str $ Just {before: "a", after: "\xDC00\xD800\xD800\x16805\x16A06\&z"} - testSplitAt 2 str $ Just {before: "a\xDC00", after: "\xD800\xD800\x16805\x16A06\&z"} - testSplitAt 3 str $ Just {before: "a\xDC00\xD800", after: "\xD800\x16805\x16A06\&z"} - testSplitAt 4 str $ Just {before: "a\xDC00\xD800\xD800", after: "\x16805\x16A06\&z"} - testSplitAt 5 str $ Just {before: "a\xDC00\xD800\xD800\x16805", after: "\x16A06\&z"} - testSplitAt 6 str $ Just {before: "a\xDC00\xD800\xD800\x16805\x16A06", after: "z"} - testSplitAt 7 str $ Just {before: str, after: ""} - testSplitAt 8 str $ Nothing + { before, after } -> + r.before == before && r.after == after + + testSplitAt 0 "" {before: "", after: "" } + testSplitAt 1 "" {before: "", after: "" } + testSplitAt 0 "a" {before: "", after: "a"} + testSplitAt 1 "ab" {before: "a", after: "b"} + testSplitAt 3 "aabcc" {before: "aab", after: "cc"} + testSplitAt (-1) "abc" {before: "", after: "abc"} + testSplitAt 0 str {before: "", after: str} + testSplitAt 1 str {before: "a", after: "\xDC00\xD800\xD800\x16805\x16A06\&z"} + testSplitAt 2 str {before: "a\xDC00", after: "\xD800\xD800\x16805\x16A06\&z"} + testSplitAt 3 str {before: "a\xDC00\xD800", after: "\xD800\x16805\x16A06\&z"} + testSplitAt 4 str {before: "a\xDC00\xD800\xD800", after: "\x16805\x16A06\&z"} + testSplitAt 5 str {before: "a\xDC00\xD800\xD800\x16805", after: "\x16A06\&z"} + testSplitAt 6 str {before: "a\xDC00\xD800\xD800\x16805\x16A06", after: "z"} + testSplitAt 7 str {before: str, after: ""} + testSplitAt 8 str {before: str, after: ""} log "take" assert $ take (-1) str == "" From 268ecdaf8a7fed21275c73ff7b4eb7796a166bd5 Mon Sep 17 00:00:00 2001 From: Nicholas Scheel Date: Fri, 11 Aug 2017 17:46:54 -0500 Subject: [PATCH 04/18] Consistency --- src/Data/String.purs | 8 ++++---- test/Test/Data/String.purs | 12 ++++++------ 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/src/Data/String.purs b/src/Data/String.purs index d6ef99e..8aa18f5 100644 --- a/src/Data/String.purs +++ b/src/Data/String.purs @@ -231,10 +231,9 @@ foreign import count :: (Char -> Boolean) -> String -> Int -- | * `split (Pattern " ") "hello world" == ["hello", "world"]` foreign import split :: Pattern -> String -> Array String --- | Returns a string split into two substrings at the given index, where --- | `before` includes all of the characters up to (but not including) the --- | given index, and `after` is the rest of the string, from the given index --- | on. +-- | Splits a string into two substrings, where `before` contains the +-- | characters up to (but not including) the given index, and `after` contains +-- | the rest of the string, from that index on. -- | -- | Thus the length of `(splitAt i s).before` will equal either `i` or -- | `length s`, if that is shorter. (Or if `i` is negative the length will be @@ -244,6 +243,7 @@ foreign import split :: Pattern -> String -> Array String -- | ```purescript -- | length (splitAt i s).before == min (max i 0) (length s) -- | (splitAt i s).before <> (splitAt i s).after == s +-- | splitAt i s == {before: take i s, after: drop i s} -- | ``` foreign import splitAt :: Int -> String -> { before :: String, after :: String } diff --git a/test/Test/Data/String.purs b/test/Test/Data/String.purs index 454d294..8cd25f5 100644 --- a/test/Test/Data/String.purs +++ b/test/Test/Data/String.purs @@ -165,12 +165,12 @@ testString = do { before, after } -> r.before == before && r.after == after - testSplitAt 1 "" { before: "", after: "" } - testSplitAt 0 "a" { before: "", after: "a" } - testSplitAt 1 "a" { before: "a", after: "" } - testSplitAt 1 "ab" { before: "a", after: "b" } - testSplitAt 3 "aabcc" { before: "aab", after: "cc" } - testSplitAt (-1) "abc" { before: "", after: "abc" } + testSplitAt 1 "" {before: "", after: ""} + testSplitAt 0 "a" {before: "", after: "a"} + testSplitAt 1 "a" {before: "a", after: ""} + testSplitAt 1 "ab" {before: "a", after: "b"} + testSplitAt 3 "aabcc" {before: "aab", after: "cc"} + testSplitAt (-1) "abc" {before: "", after: "abc"} log "toCharArray" assert $ toCharArray "" == [] From 51aac608607deb59dd95bd6f91cd41a3c06d30f2 Mon Sep 17 00:00:00 2001 From: Nicholas Scheel Date: Sun, 13 Aug 2017 12:14:37 -0500 Subject: [PATCH 05/18] Copy the rest of the comment over --- src/Data/String/CodePoints.purs | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/src/Data/String/CodePoints.purs b/src/Data/String/CodePoints.purs index 32ddcb2..c0a45cd 100644 --- a/src/Data/String/CodePoints.purs +++ b/src/Data/String/CodePoints.purs @@ -217,6 +217,17 @@ singletonFallback (CodePoint cp) = -- | Splits a string into two substrings, where `before` contains the code -- | points up to (but not including) the given index, and `after` contains the -- | rest of the string, from that index on. +-- | +-- | Thus the length of `(splitAt i s).before` will equal either `i` or +-- | `length s`, if that is shorter. (Or if `i` is negative the length will be +-- | 0.) +-- | +-- | In code: +-- | ```purescript +-- | length (splitAt i s).before == min (max i 0) (length s) +-- | (splitAt i s).before <> (splitAt i s).after == s +-- | splitAt i s == {before: take i s, after: drop i s} +-- | ``` splitAt :: Int -> String -> { before :: String, after :: String } splitAt i s = let before = take i s in From 58e66ed83bd65571b9fc0570b2c2da42c72b33b9 Mon Sep 17 00:00:00 2001 From: Matthew Leon Date: Sun, 11 Mar 2018 16:03:16 -0400 Subject: [PATCH 06/18] conversions between NonEmptyArray/NonEmptyString addresses https://github.com/purescript/purescript-strings/issues/101 This uses unsafePartial for the sake of efficiency. --- bower.json | 2 +- src/Data/String/NonEmpty.purs | 12 ++++++++++++ test/Test/Data/String/NonEmpty.purs | 8 ++++++++ 3 files changed, 21 insertions(+), 1 deletion(-) diff --git a/bower.json b/bower.json index 385c62f..d9f3936 100644 --- a/bower.json +++ b/bower.json @@ -22,7 +22,7 @@ "purescript-maybe": "^3.0.0", "purescript-partial": "^1.2.0", "purescript-unfoldable": "^3.0.0", - "purescript-arrays": "^4.0.1", + "purescript-arrays": "^4.3.0", "purescript-integers": "^3.2.0" }, "devDependencies": { diff --git a/src/Data/String/NonEmpty.purs b/src/Data/String/NonEmpty.purs index b3d93a9..0fdd629 100644 --- a/src/Data/String/NonEmpty.purs +++ b/src/Data/String/NonEmpty.purs @@ -10,12 +10,14 @@ module Data.String.NonEmpty , fromString , unsafeFromString , fromCharArray + , fromNonEmptyCharArray , singleton , cons , snoc , fromFoldable1 , toString , toCharArray + , toNonEmptyCharArray , charAt , charCodeAt , toChar @@ -52,6 +54,8 @@ module Data.String.NonEmpty import Prelude +import Data.Array.NonEmpty (NonEmptyArray) +import Data.Array.NonEmpty as NEA import Data.Foldable (class Foldable) import Data.Foldable as F import Data.Maybe (Maybe(..), fromJust) @@ -60,6 +64,7 @@ import Data.Semigroup.Foldable as F1 import Data.String (Pattern(..)) import Data.String as String import Data.String.Unsafe as U +import Partial.Unsafe (unsafePartial) import Unsafe.Coerce (unsafeCoerce) -- | A string that is known not to be empty. @@ -110,6 +115,9 @@ fromCharArray = case _ of [] -> Nothing cs -> Just (NonEmptyString (String.fromCharArray cs)) +fromNonEmptyCharArray :: NonEmptyArray Char -> NonEmptyString +fromNonEmptyCharArray = unsafePartial fromJust <<< fromCharArray <<< NEA.toArray + -- | Creates a `NonEmptyString` from a character. singleton :: Char -> NonEmptyString singleton = NonEmptyString <<< String.singleton @@ -181,6 +189,10 @@ toChar (NonEmptyString s) = String.toChar s toCharArray :: NonEmptyString -> Array Char toCharArray (NonEmptyString s) = String.toCharArray s +-- | Converts the `NonEmptyString` into a non-empty array of characters. +toNonEmptyCharArray :: NonEmptyString -> NonEmptyArray Char +toNonEmptyCharArray = unsafePartial fromJust <<< NEA.fromArray <<< toCharArray + -- | Appends a string to this non-empty string. Since one of the strings is -- | non-empty we know the result will be too. -- | diff --git a/test/Test/Data/String/NonEmpty.purs b/test/Test/Data/String/NonEmpty.purs index fc80784..c473ff7 100644 --- a/test/Test/Data/String/NonEmpty.purs +++ b/test/Test/Data/String/NonEmpty.purs @@ -4,6 +4,7 @@ import Data.String.NonEmpty import Control.Monad.Eff (Eff) import Control.Monad.Eff.Console (CONSOLE, log) +import Data.Array.NonEmpty as NEA import Data.Array.Partial as AP import Data.Foldable (class Foldable, foldl) import Data.Maybe (Maybe(..), fromJust, isNothing, maybe) @@ -22,6 +23,9 @@ testNonEmptyString = do assert $ fromCharArray [] == Nothing assert $ fromCharArray ['a', 'b'] == Just (nes "ab") + log "fromNonEmptyCharArray" + assert $ fromNonEmptyCharArray (NEA.singleton 'b') == singleton 'b' + log "singleton" assert $ singleton 'a' == nes "a" @@ -64,6 +68,10 @@ testNonEmptyString = do assert $ toCharArray (nes "ab") == ['a', 'b'] assert $ toCharArray (nes "Hello☺\n") == ['H','e','l','l','o','☺','\n'] + log "toNonEmptyCharArray" + assert $ toNonEmptyCharArray (nes "ab") + == unsafePartial fromJust (NEA.fromArray ['a', 'b']) + log "appendString" assert $ appendString (nes "Hello") " world" == nes "Hello world" assert $ appendString (nes "Hello") "" == nes "Hello" From 1f8411dfe6b2e54166ddba9c5179e0ce16efe18d Mon Sep 17 00:00:00 2001 From: Matthew Leon Date: Sun, 1 Apr 2018 14:57:25 -0400 Subject: [PATCH 07/18] add benchmarks --- bench/Main.purs | 57 +++++++++++++++++++++++++++++++++++++++++++++++++ bower.json | 3 ++- package.json | 6 +++++- 3 files changed, 64 insertions(+), 2 deletions(-) create mode 100644 bench/Main.purs diff --git a/bench/Main.purs b/bench/Main.purs new file mode 100644 index 0000000..87c1ca0 --- /dev/null +++ b/bench/Main.purs @@ -0,0 +1,57 @@ +module Bench.Main where + +import Prelude + +import Control.Monad.Eff (Eff) +import Control.Monad.Eff.Console (CONSOLE, log) +import Data.Array.NonEmpty (fromArray) +import Data.Maybe (fromJust) +import Data.String (toCharArray) +import Data.String.NonEmpty (fromFoldable1, fromNonEmptyCharArray) +import Partial.Unsafe (unsafePartial) +import Performance.Minibench (benchWith) + +main :: Eff (console :: CONSOLE) Unit +main = do + log "NonEmpty conversions" + log "======" + log "" + benchNonEmptyConversions + +benchNonEmptyConversions :: Eff (console :: CONSOLE) Unit +benchNonEmptyConversions = do + log "fromNonEmptyCharArray: short" + log "---" + benchFromNonEmptyCharArray + log "" + + log "fromFoldable1" + log "---" + benchFromFoldable1 + log "" + + where + + benchFromNonEmptyCharArray = do + log "short string" + bench \_ -> fromNonEmptyCharArray shortStringArr + + log "long string" + bench \_ -> fromNonEmptyCharArray longStringArr + + benchFromFoldable1 = do + log "short string" + bench \_ -> fromFoldable1 shortStringArr + + log "long string" + bench \_ -> fromFoldable1 longStringArr + + shortStringArr = unsafePartial fromJust $ fromArray + $ toCharArray "supercalifragilisticexpialidocious" + longStringArr = unsafePartial fromJust $ fromArray + $ toCharArray loremIpsum + + bench = benchWith 100000 + +loremIpsum :: String +loremIpsum = "Lorem ipsum dolor sit amet, consectetur adipiscing elit. Ut aliquet euismod ligula, vitae lacinia lorem imperdiet nec. Nulla volutpat ullamcorper mollis. Proin interdum quam a sem auctor, id tempus nisl pretium. Suspendisse potenti. Quisque ut libero consequat, suscipit sem a, malesuada nisi. Aliquam dictum odio mi, eu laoreet felis scelerisque non. Ut in odio vehicula, cursus augue sed, tincidunt lorem. Vestibulum consequat lectus eu commodo vulputate. Nam vitae faucibus ipsum. Curabitur sit amet neque sed est sagittis vehicula nec nec risus. Phasellus consectetur cursus malesuada. Vestibulum commodo lorem ut mauris mollis faucibus. Integer ut massa auctor, scelerisque nisi nec, rutrum nisl. Integer vel ex sem. Sed purus felis, molestie eget cursus vel, maximus ut augue. Curabitur nunc ligula, lobortis vitae vehicula a, volutpat nec sem. Phasellus non sapien ipsum. Mauris dolor justo, mollis at elit a, sollicitudin commodo quam. Curabitur posuere felis at nunc pharetra, eu convallis lectus dapibus. Aliquam ullamcorper porta fermentum. Donec at tellus metus. Donec pharetra tempor odio sit amet viverra. Nam vel metus libero. Vivamus maximus quis lacus id pharetra. Duis sed diam molestie, sodales leo id, pulvinar justo. In non augue tempor risus consectetur hendrerit. In libero nulla, elementum non ultrices eu, vehicula non ipsum. Maecenas in hendrerit tellus, sodales dignissim turpis. Ut odio diam, convallis in elit non, consequat gravida nisi. Cras egestas metus eleifend sapien efficitur, vel vulputate est porta. Aliquam posuere, magna nec bibendum luctus, quam risus efficitur sapien, id volutpat metus ex non lorem. Praesent velit eros, efficitur sed tortor quis, lobortis eleifend ligula. Sed tellus quam, aliquet vitae sagittis a, egestas eget massa. Etiam odio elit, hendrerit vel dui vel, fermentum pharetra neque. Curabitur quis mauris id lacus consectetur rhoncus non nec mauris. Mauris blandit tempor pretium. Donec non nisi finibus, lobortis dolor vitae, euismod arcu. Nullam scelerisque lacus in dolor volutpat mollis. Nunc vitae consectetur ligula, quis laoreet quam.Proin sit amet nisi eu orci hendrerit imperdiet vitae sit amet leo. Donec sodales id ante eget viverra. Nullam vitae elit in mauris accumsan feugiat id a velit. Nulla facilisi. Cras in turpis efficitur, consectetur justo quis, suscipit tortor. Sed tincidunt pellentesque sapien, in ultricies eros rhoncus sit amet. Integer blandit ornare lobortis. Duis dictum sit amet mauris sit amet cursus. Nullam nec nisl mauris. Praesent cursus imperdiet mi mattis luctus. Donec in tortor fermentum, efficitur turpis vel, facilisis augue. Integer egestas nisl et magna volutpat ornare. Donec pulvinar risus elit, eget viverra est feugiat in.Ut nec ante vestibulum neque pulvinar pretium sit amet eu nisi. Aliquam erat volutpat. Maecenas egestas nisi et mi congue, sed ultricies nibh posuere. Suspendisse potenti. Donec a nulla et velit elementum pretium. Pellentesque gravida imperdiet sem et varius. Praesent ac diam diam. Donec iaculis risus ex, ac eleifend sapien luctus ut. Fusce aliquet, lacus tincidunt porta malesuada, massa augue commodo nulla, ac malesuada tortor est sed eros. Praesent mattis, nisi eget ullamcorper vestibulum, lacus ante placerat metus, ac ullamcorper ante tellus vel nulla. Praesent vehicula in est sit amet varius. Sed facilisis felis sed sem porttitor rutrum. Etiam sollicitudin erat neque, id gravida metus scelerisque quis. Proin venenatis pharetra lectus ac auctor." diff --git a/bower.json b/bower.json index d9f3936..a20beba 100644 --- a/bower.json +++ b/bower.json @@ -27,6 +27,7 @@ }, "devDependencies": { "purescript-assert": "^3.0.0", - "purescript-console": "^3.0.0" + "purescript-console": "^3.0.0", + "purescript-minibench": "^1.0.1" } } diff --git a/package.json b/package.json index 132cefc..0f1ba83 100644 --- a/package.json +++ b/package.json @@ -3,7 +3,11 @@ "scripts": { "clean": "rimraf output && rimraf .pulp-cache", "build": "eslint src && pulp build -- --censor-lib --strict", - "test": "pulp test" + "test": "pulp test", + + "bench:build": "purs compile 'bench/**/*.purs' 'src/**/*.purs' 'bower_components/*/src/**/*.purs'", + "bench:run": "node --expose-gc -e 'require(\"./output/Bench.Main/index.js\").main()'", + "bench": "npm run bench:build && npm run bench:run" }, "devDependencies": { "eslint": "^3.17.1", From 00030c2c1f2168f662659f06de559ee0f3648fff Mon Sep 17 00:00:00 2001 From: Gary Burgess Date: Tue, 17 Apr 2018 01:25:53 +0100 Subject: [PATCH 08/18] Updates for 0.12 --- bower.json | 18 +++++++++--------- src/Data/String/Regex/Flags.purs | 2 -- test/Test/Data/Char.purs | 8 ++++---- test/Test/Data/String.purs | 8 ++++---- test/Test/Data/String/CaseInsensitive.purs | 8 ++++---- test/Test/Data/String/CodePoints.purs | 8 ++++---- test/Test/Data/String/NonEmpty.purs | 8 ++++---- test/Test/Data/String/Regex.purs | 8 ++++---- test/Test/Data/String/Unsafe.purs | 8 ++++---- test/Test/Main.purs | 7 +++---- 10 files changed, 40 insertions(+), 43 deletions(-) diff --git a/bower.json b/bower.json index 385c62f..1fe3ac2 100644 --- a/bower.json +++ b/bower.json @@ -17,16 +17,16 @@ "package.json" ], "dependencies": { - "purescript-either": "^3.0.0", - "purescript-gen": "^1.1.0", - "purescript-maybe": "^3.0.0", - "purescript-partial": "^1.2.0", - "purescript-unfoldable": "^3.0.0", - "purescript-arrays": "^4.0.1", - "purescript-integers": "^3.2.0" + "purescript-either": "#compiler/0.12", + "purescript-gen": "#compiler/0.12", + "purescript-maybe": "#compiler/0.12", + "purescript-partial": "#compiler/0.12", + "purescript-unfoldable": "#compiler/0.12", + "purescript-arrays": "#compiler/0.12", + "purescript-integers": "#compiler/0.12" }, "devDependencies": { - "purescript-assert": "^3.0.0", - "purescript-console": "^3.0.0" + "purescript-assert": "#compiler/0.12", + "purescript-console": "#compiler/0.12" } } diff --git a/src/Data/String/Regex/Flags.purs b/src/Data/String/Regex/Flags.purs index 2684498..bd14d8c 100644 --- a/src/Data/String/Regex/Flags.purs +++ b/src/Data/String/Regex/Flags.purs @@ -3,8 +3,6 @@ module Data.String.Regex.Flags where import Prelude import Control.MonadPlus (guard) - -import Data.Monoid (class Monoid) import Data.String (joinWith) type RegexFlagsRec = diff --git a/test/Test/Data/Char.purs b/test/Test/Data/Char.purs index ee86573..869a2ca 100644 --- a/test/Test/Data/Char.purs +++ b/test/Test/Data/Char.purs @@ -2,14 +2,14 @@ module Test.Data.Char (testChar) where import Prelude (Unit, (==), ($), discard) -import Control.Monad.Eff (Eff) -import Control.Monad.Eff.Console (CONSOLE, log) +import Effect (Effect) +import Effect.Console (log) import Data.Char -import Test.Assert (ASSERT, assert) +import Test.Assert (assert) -testChar :: forall eff. Eff (console :: CONSOLE, assert :: ASSERT | eff) Unit +testChar :: Effect Unit testChar = do log "toCharCode" assert $ toCharCode 'a' == 97 diff --git a/test/Test/Data/String.purs b/test/Test/Data/String.purs index eb2cf90..24f0a4f 100644 --- a/test/Test/Data/String.purs +++ b/test/Test/Data/String.purs @@ -2,15 +2,15 @@ module Test.Data.String (testString) where import Prelude (Unit, Ordering(..), (==), ($), discard, negate, not, (/=), (&&)) -import Control.Monad.Eff (Eff) -import Control.Monad.Eff.Console (CONSOLE, log) +import Effect (Effect) +import Effect.Console (log) import Data.Maybe (Maybe(..), isNothing, maybe) import Data.String -import Test.Assert (ASSERT, assert) +import Test.Assert (assert) -testString :: forall eff. Eff (console :: CONSOLE, assert :: ASSERT | eff) Unit +testString :: Effect Unit testString = do log "charAt" assert $ charAt 0 "" == Nothing diff --git a/test/Test/Data/String/CaseInsensitive.purs b/test/Test/Data/String/CaseInsensitive.purs index 9bd4530..ec7d4bf 100644 --- a/test/Test/Data/String/CaseInsensitive.purs +++ b/test/Test/Data/String/CaseInsensitive.purs @@ -2,14 +2,14 @@ module Test.Data.String.CaseInsensitive (testCaseInsensitiveString) where import Prelude (Unit, (==), ($), discard, compare, Ordering(..)) -import Control.Monad.Eff (Eff) -import Control.Monad.Eff.Console (CONSOLE, log) +import Effect (Effect) +import Effect.Console (log) import Data.String.CaseInsensitive -import Test.Assert (ASSERT, assert) +import Test.Assert (assert) -testCaseInsensitiveString :: forall eff. Eff (console :: CONSOLE, assert :: ASSERT | eff) Unit +testCaseInsensitiveString :: Effect Unit testCaseInsensitiveString = do log "equality" assert $ CaseInsensitiveString "aB" == CaseInsensitiveString "AB" diff --git a/test/Test/Data/String/CodePoints.purs b/test/Test/Data/String/CodePoints.purs index 75c3573..56ba501 100644 --- a/test/Test/Data/String/CodePoints.purs +++ b/test/Test/Data/String/CodePoints.purs @@ -2,19 +2,19 @@ module Test.Data.String.CodePoints (testStringCodePoints) where import Prelude -import Control.Monad.Eff (Eff) -import Control.Monad.Eff.Console (CONSOLE, log) +import Effect (Effect) +import Effect.Console (log) import Data.Char (fromCharCode) import Data.Maybe (Maybe(..), isNothing, maybe) import Data.String.CodePoints -import Test.Assert (ASSERT, assert) +import Test.Assert (assert) str :: String str = "a\xDC00\xD800\xD800\x16805\x16A06\&z" -testStringCodePoints :: forall eff. Eff (console :: CONSOLE, assert :: ASSERT | eff) Unit +testStringCodePoints :: Effect Unit testStringCodePoints = do log "show" assert $ map show (codePointAt 0 str) == Just "(CodePoint 0x61)" diff --git a/test/Test/Data/String/NonEmpty.purs b/test/Test/Data/String/NonEmpty.purs index fc80784..e9cd771 100644 --- a/test/Test/Data/String/NonEmpty.purs +++ b/test/Test/Data/String/NonEmpty.purs @@ -2,17 +2,17 @@ module Test.Data.String.NonEmpty (testNonEmptyString) where import Data.String.NonEmpty -import Control.Monad.Eff (Eff) -import Control.Monad.Eff.Console (CONSOLE, log) +import Effect (Effect) +import Effect.Console (log) import Data.Array.Partial as AP import Data.Foldable (class Foldable, foldl) import Data.Maybe (Maybe(..), fromJust, isNothing, maybe) import Data.Semigroup.Foldable (class Foldable1, foldMap1Default) import Partial.Unsafe (unsafePartial) import Prelude (class Functor, Ordering(..), Unit, append, discard, negate, not, ($), (&&), (/=), (==)) -import Test.Assert (ASSERT, assert) +import Test.Assert (assert) -testNonEmptyString :: forall eff. Eff (console :: CONSOLE, assert :: ASSERT | eff) Unit +testNonEmptyString :: Effect Unit testNonEmptyString = do log "fromString" assert $ fromString "" == Nothing diff --git a/test/Test/Data/String/Regex.purs b/test/Test/Data/String/Regex.purs index 6228b91..0faaae2 100644 --- a/test/Test/Data/String/Regex.purs +++ b/test/Test/Data/String/Regex.purs @@ -2,8 +2,8 @@ module Test.Data.String.Regex (testStringRegex) where import Prelude (Unit, ($), (<>), discard, (==), not) -import Control.Monad.Eff (Eff) -import Control.Monad.Eff.Console (CONSOLE, log) +import Effect (Effect) +import Effect.Console (log) import Data.Either (isLeft) import Data.Maybe (Maybe(..)) @@ -11,9 +11,9 @@ import Data.String.Regex import Data.String.Regex.Flags (global, ignoreCase, noFlags) import Data.String.Regex.Unsafe (unsafeRegex) -import Test.Assert (ASSERT, assert) +import Test.Assert (assert) -testStringRegex :: forall eff. Eff (console :: CONSOLE, assert :: ASSERT | eff) Unit +testStringRegex :: Effect Unit testStringRegex = do log "regex" assert $ test (unsafeRegex "^a" noFlags) "abc" diff --git a/test/Test/Data/String/Unsafe.purs b/test/Test/Data/String/Unsafe.purs index bd2924d..c87055a 100644 --- a/test/Test/Data/String/Unsafe.purs +++ b/test/Test/Data/String/Unsafe.purs @@ -2,14 +2,14 @@ module Test.Data.String.Unsafe (testStringUnsafe) where import Prelude (Unit, (==), ($), discard) -import Control.Monad.Eff (Eff) -import Control.Monad.Eff.Console (CONSOLE, log) +import Effect (Effect) +import Effect.Console (log) import Data.String.Unsafe -import Test.Assert (ASSERT, assert) +import Test.Assert (assert) -testStringUnsafe :: forall eff. Eff (console :: CONSOLE, assert :: ASSERT | eff) Unit +testStringUnsafe :: Effect Unit testStringUnsafe = do log "charCodeAt" assert $ charCodeAt 0 "ab" == 97 diff --git a/test/Test/Main.purs b/test/Test/Main.purs index e81600f..92d6cd1 100644 --- a/test/Test/Main.purs +++ b/test/Test/Main.purs @@ -2,9 +2,8 @@ module Test.Main where import Prelude -import Control.Monad.Eff (Eff) -import Control.Monad.Eff.Console (CONSOLE, log) -import Test.Assert (ASSERT) +import Effect (Effect) +import Effect.Console (log) import Test.Data.Char (testChar) import Test.Data.String (testString) import Test.Data.String.CaseInsensitive (testCaseInsensitiveString) @@ -13,7 +12,7 @@ import Test.Data.String.NonEmpty (testNonEmptyString) import Test.Data.String.Regex (testStringRegex) import Test.Data.String.Unsafe (testStringUnsafe) -main :: Eff (console :: CONSOLE, assert :: ASSERT) Unit +main :: Effect Unit main = do log "\n--- Data.Char ---\n" testChar From d6c6b58e5b733899233f6dc9e8de1c83eafc719b Mon Sep 17 00:00:00 2001 From: Gary Burgess Date: Fri, 18 May 2018 15:39:11 +0100 Subject: [PATCH 09/18] Use NonEmptyArray for Regex match --- src/Data/String/Regex.js | 2 +- src/Data/String/Regex.purs | 5 +++-- test/Test/Data/String/Regex.purs | 20 ++++++++++++-------- 3 files changed, 16 insertions(+), 11 deletions(-) diff --git a/src/Data/String/Regex.js b/src/Data/String/Regex.js index 2e3ac3b..0b84c5b 100644 --- a/src/Data/String/Regex.js +++ b/src/Data/String/Regex.js @@ -46,7 +46,7 @@ exports._match = function (just) { return function (r) { return function (s) { var m = s.match(r); - if (m == null) { + if (m == null || m.length === 0) { return nothing; } else { for (var i = 0; i < m.length; i++) { diff --git a/src/Data/String/Regex.purs b/src/Data/String/Regex.purs index d60b9ee..79a1a9f 100644 --- a/src/Data/String/Regex.purs +++ b/src/Data/String/Regex.purs @@ -18,6 +18,7 @@ module Data.String.Regex import Prelude +import Data.Array.NonEmpty (NonEmptyArray) import Data.Either (Either(..)) import Data.Maybe (Maybe(..)) import Data.String (Pattern(..), contains) @@ -82,13 +83,13 @@ foreign import _match -> (forall r. Maybe r) -> Regex -> String - -> Maybe (Array (Maybe String)) + -> Maybe (NonEmptyArray (Maybe String)) -- | Matches the string against the `Regex` and returns an array of matches -- | if there were any. Each match has type `Maybe String`, where `Nothing` -- | represents an unmatched optional capturing group. -- | See [reference](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/match). -match :: Regex -> String -> Maybe (Array (Maybe String)) +match :: Regex -> String -> Maybe (NonEmptyArray (Maybe String)) match = _match Just Nothing -- | Replaces occurences of the `Regex` with the first string. The replacement diff --git a/test/Test/Data/String/Regex.purs b/test/Test/Data/String/Regex.purs index 0faaae2..326b35e 100644 --- a/test/Test/Data/String/Regex.purs +++ b/test/Test/Data/String/Regex.purs @@ -1,16 +1,16 @@ module Test.Data.String.Regex (testStringRegex) where -import Prelude (Unit, ($), (<>), discard, (==), not) - -import Effect (Effect) -import Effect.Console (log) +import Data.String.Regex +import Data.Array.NonEmpty (NonEmptyArray, fromArray) import Data.Either (isLeft) -import Data.Maybe (Maybe(..)) -import Data.String.Regex +import Data.Maybe (Maybe(..), fromJust) import Data.String.Regex.Flags (global, ignoreCase, noFlags) import Data.String.Regex.Unsafe (unsafeRegex) - +import Effect (Effect) +import Effect.Console (log) +import Partial.Unsafe (unsafePartial) +import Prelude (type (~>), Unit, discard, not, ($), (<<<), (<>), (==)) import Test.Assert (assert) testStringRegex :: Effect Unit @@ -26,7 +26,8 @@ testStringRegex = do assert $ "quxbarquxbaz" == replace (unsafeRegex "foo" (global <> ignoreCase)) "qux" "foobarFOObaz" log "match" - assert $ match (unsafeRegex "^abc$" noFlags) "abc" == Just [Just "abc"] + assert $ match (unsafeRegex "^abc$" noFlags) "abc" == Just (nea [Just "abc"]) + assert $ match (unsafeRegex "^abc$" noFlags) "xyz" == Nothing log "replace" assert $ replace (unsafeRegex "-" noFlags) "!" "a-b-c" == "a!b-c" @@ -50,3 +51,6 @@ testStringRegex = do let pattern = unsafeRegex "a" (parseFlags "g") assert $ test pattern "a" assert $ test pattern "a" + +nea :: Array ~> NonEmptyArray +nea = unsafePartial fromJust <<< fromArray From bc164b95a6d07ae6159230fcf448b9f007f5bb92 Mon Sep 17 00:00:00 2001 From: Gary Burgess Date: Fri, 18 May 2018 16:01:12 +0100 Subject: [PATCH 10/18] Rename count to countPrefix #81 --- src/Data/String.js | 2 +- src/Data/String.purs | 10 +++++----- src/Data/String/CodePoints.js | 2 +- src/Data/String/CodePoints.purs | 14 +++++++------- src/Data/String/NonEmpty.purs | 8 ++++---- test/Test/Data/String.purs | 10 +++++----- test/Test/Data/String/CodePoints.purs | 12 ++++++------ test/Test/Data/String/NonEmpty.purs | 10 +++++----- 8 files changed, 34 insertions(+), 34 deletions(-) diff --git a/src/Data/String.js b/src/Data/String.js index 1c46b9a..3079ddc 100644 --- a/src/Data/String.js +++ b/src/Data/String.js @@ -139,7 +139,7 @@ exports._slice = function (b) { }; }; -exports.count = function (p) { +exports.countPrefix = function (p) { return function (s) { var i = 0; while (i < s.length && p(s.charAt(i))) i++; diff --git a/src/Data/String.purs b/src/Data/String.purs index c377316..4c6843e 100644 --- a/src/Data/String.purs +++ b/src/Data/String.purs @@ -29,7 +29,7 @@ module Data.String , slice , stripPrefix , stripSuffix - , count + , countPrefix , split , splitAt , toCharArray @@ -162,7 +162,7 @@ uncons s = Just { head: U.charAt zero s, tail: drop one s } -- | ``` -- | takeWhile :: (Char -> Boolean) -> String -> String -takeWhile p s = take (count p s) s +takeWhile p s = take (countPrefix p s) s -- | Returns the suffix remaining after `takeWhile`. -- | @@ -171,7 +171,7 @@ takeWhile p s = take (count p s) s -- | ``` -- | dropWhile :: (Char -> Boolean) -> String -> String -dropWhile p s = drop (count p s) s +dropWhile p s = drop (countPrefix p s) s -- | Returns the substring at indices `[begin, end)`. -- | If either index is negative, it is normalised to `length s - index`, @@ -407,10 +407,10 @@ dropRight i s = take (length s - i) s -- | of the string for which the predicate holds. -- | -- | ```purescript --- | count (_ /= ' ') "Hello World" == 5 -- since length "Hello" == 5 +-- | countPrefix (_ /= ' ') "Hello World" == 5 -- since length "Hello" == 5 -- | ``` -- | -foreign import count :: (Char -> Boolean) -> String -> Int +foreign import countPrefix :: (Char -> Boolean) -> String -> Int -- | Returns the substrings of the second string separated along occurences -- | of the first string. diff --git a/src/Data/String/CodePoints.js b/src/Data/String/CodePoints.js index 1c73483..eead7f6 100644 --- a/src/Data/String/CodePoints.js +++ b/src/Data/String/CodePoints.js @@ -40,7 +40,7 @@ exports._codePointAt = function (fallback) { }; }; -exports._count = function (fallback) { +exports._countPrefix = function (fallback) { return function (unsafeCodePointAt0) { if (hasStringIterator) { return function (pred) { diff --git a/src/Data/String/CodePoints.purs b/src/Data/String/CodePoints.purs index a48a764..d51d180 100644 --- a/src/Data/String/CodePoints.purs +++ b/src/Data/String/CodePoints.purs @@ -9,7 +9,7 @@ module Data.String.CodePoints , codePointFromInt , codePointToInt , codePointFromChar - , count + , countPrefix , drop , dropWhile , fromCodePointArray @@ -168,14 +168,14 @@ codePointAtFallback n s = case uncons s of -- | time linear to the length of the string. -- | -- | ```purescript --- | >>> count (\c -> codePointToInt c == 0x1D400) "𝐀𝐀 b c 𝐀" +-- | >>> countPrefix (\c -> codePointToInt c == 0x1D400) "𝐀𝐀 b c 𝐀" -- | 2 -- | ``` -- | -count :: (CodePoint -> Boolean) -> String -> Int -count = _count countFallback unsafeCodePointAt0 +countPrefix :: (CodePoint -> Boolean) -> String -> Int +countPrefix = _countPrefix countFallback unsafeCodePointAt0 -foreign import _count +foreign import _countPrefix :: ((CodePoint -> Boolean) -> String -> Int) -> (String -> CodePoint) -> (CodePoint -> Boolean) @@ -217,7 +217,7 @@ drop n s = String.drop (String.length (take n s)) s -- | ``` -- | dropWhile :: (CodePoint -> Boolean) -> String -> String -dropWhile p s = drop (count p s) s +dropWhile p s = drop (countPrefix p s) s -- | Creates a string from an array of code points. Operates in space and time @@ -402,7 +402,7 @@ takeFallback n s = case uncons s of -- | ``` -- | takeWhile :: (CodePoint -> Boolean) -> String -> String -takeWhile p s = take (count p s) s +takeWhile p s = take (countPrefix p s) s -- | Creates an array of code points from a string. Operates in space and time diff --git a/src/Data/String/NonEmpty.purs b/src/Data/String/NonEmpty.purs index 99396df..a6f50e2 100644 --- a/src/Data/String/NonEmpty.purs +++ b/src/Data/String/NonEmpty.purs @@ -41,7 +41,7 @@ module Data.String.NonEmpty , dropWhile , stripPrefix , stripSuffix - , count + , countPrefix , splitAt , toLower , toUpper @@ -409,10 +409,10 @@ dropRight i (NonEmptyString s) -- | for which the predicate holds. -- | -- | ```purescript --- | count (_ /= 'o') (NonEmptyString "Hello World") == 4 +-- | countPrefix (_ /= 'o') (NonEmptyString "Hello World") == 4 -- | ``` -count :: (Char -> Boolean) -> NonEmptyString -> Int -count = liftS <<< String.count +countPrefix :: (Char -> Boolean) -> NonEmptyString -> Int +countPrefix = liftS <<< String.countPrefix -- | Returns the substrings of a split at the given index, if the index is -- | within bounds. diff --git a/test/Test/Data/String.purs b/test/Test/Data/String.purs index a99fbbe..855ee5c 100644 --- a/test/Test/Data/String.purs +++ b/test/Test/Data/String.purs @@ -160,11 +160,11 @@ testString = do assert $ dropRight 3 "ab" == "" assert $ dropRight (-1) "ab" == "ab" - log "count" - assert $ count (_ == 'a') "" == 0 - assert $ count (_ == 'a') "ab" == 1 - assert $ count (_ == 'a') "aaab" == 3 - assert $ count (_ == 'a') "abaa" == 1 + log "countPrefix" + assert $ countPrefix (_ == 'a') "" == 0 + assert $ countPrefix (_ == 'a') "ab" == 1 + assert $ countPrefix (_ == 'a') "aaab" == 3 + assert $ countPrefix (_ == 'a') "abaa" == 1 log "split" assert $ split (Pattern "") "" == [] diff --git a/test/Test/Data/String/CodePoints.purs b/test/Test/Data/String/CodePoints.purs index 7d7a8cc..29c282d 100644 --- a/test/Test/Data/String/CodePoints.purs +++ b/test/Test/Data/String/CodePoints.purs @@ -41,12 +41,12 @@ testStringCodePoints = do assert $ Just (codePointFromChar $ fromCharCode 0) == codePointFromInt 0 assert $ Just (codePointFromChar $ fromCharCode 0xFFFF) == codePointFromInt 0xFFFF - log "count" - assert $ count (\_ -> true) "" == 0 - assert $ count (\_ -> false) str == 0 - assert $ count (\_ -> true) str == 7 - assert $ count (\x -> codePointToInt x < 0xFFFF) str == 4 - assert $ count (\x -> codePointToInt x < 0xDC00) str == 1 + log "countPrefix" + assert $ countPrefix (\_ -> true) "" == 0 + assert $ countPrefix (\_ -> false) str == 0 + assert $ countPrefix (\_ -> true) str == 7 + assert $ countPrefix (\x -> codePointToInt x < 0xFFFF) str == 4 + assert $ countPrefix (\x -> codePointToInt x < 0xDC00) str == 1 log "drop" assert $ drop (-1) str == str diff --git a/test/Test/Data/String/NonEmpty.purs b/test/Test/Data/String/NonEmpty.purs index 89b389f..e1f0983 100644 --- a/test/Test/Data/String/NonEmpty.purs +++ b/test/Test/Data/String/NonEmpty.purs @@ -201,11 +201,11 @@ testNonEmptyString = do assert $ dropRight 3 (nes "ab") == Nothing assert $ dropRight (-1) (nes "ab") == Just (nes "ab") - log "count" - assert $ count (_ == 'a') (nes "ab") == 1 - assert $ count (_ == 'a') (nes "aaab") == 3 - assert $ count (_ == 'a') (nes "abaa") == 1 - assert $ count (_ == 'c') (nes "abaa") == 0 + log "countPrefix" + assert $ countPrefix (_ == 'a') (nes "ab") == 1 + assert $ countPrefix (_ == 'a') (nes "aaab") == 3 + assert $ countPrefix (_ == 'a') (nes "abaa") == 1 + assert $ countPrefix (_ == 'c') (nes "abaa") == 0 log "splitAt" let From e019df0d24f5d93b64dd9a27e8fa8751d04801e4 Mon Sep 17 00:00:00 2001 From: Gary Burgess Date: Sat, 19 May 2018 01:28:48 +0100 Subject: [PATCH 11/18] WIP --- src/Data/String.js | 128 -------- src/Data/String.purs | 361 +---------------------- src/Data/String/CodePoint.purs | 446 ++++++++++++++++++++++++++++ src/Data/String/CodePoints.purs | 19 +- src/Data/String/CodeUnits.js | 119 ++++++++ src/Data/String/CodeUnits.purs | 294 +++++++++++++++++++ src/Data/String/Gen.purs | 5 +- src/Data/String/NonEmpty.purs | 499 -------------------------------- src/Data/String/Pattern.purs | 33 +++ 9 files changed, 907 insertions(+), 997 deletions(-) create mode 100644 src/Data/String/CodePoint.purs create mode 100644 src/Data/String/CodeUnits.js create mode 100644 src/Data/String/CodeUnits.purs delete mode 100644 src/Data/String/NonEmpty.purs create mode 100644 src/Data/String/Pattern.purs diff --git a/src/Data/String.js b/src/Data/String.js index 3079ddc..3237741 100644 --- a/src/Data/String.js +++ b/src/Data/String.js @@ -1,95 +1,5 @@ "use strict"; -exports._charAt = function (just) { - return function (nothing) { - return function (i) { - return function (s) { - return i >= 0 && i < s.length ? just(s.charAt(i)) : nothing; - }; - }; - }; -}; - -exports.singleton = function (c) { - return c; -}; - -exports._charCodeAt = function (just) { - return function (nothing) { - return function (i) { - return function (s) { - return i >= 0 && i < s.length ? just(s.charCodeAt(i)) : nothing; - }; - }; - }; -}; - -exports._toChar = function (just) { - return function (nothing) { - return function (s) { - return s.length === 1 ? just(s) : nothing; - }; - }; -}; - -exports.fromCharArray = function (a) { - return a.join(""); -}; - -exports._indexOf = function (just) { - return function (nothing) { - return function (x) { - return function (s) { - var i = s.indexOf(x); - return i === -1 ? nothing : just(i); - }; - }; - }; -}; - -exports["_indexOf'"] = function (just) { - return function (nothing) { - return function (x) { - return function (startAt) { - return function (s) { - if (startAt < 0 || startAt > s.length) return nothing; - var i = s.indexOf(x, startAt); - return i === -1 ? nothing : just(i); - }; - }; - }; - }; -}; - -exports._lastIndexOf = function (just) { - return function (nothing) { - return function (x) { - return function (s) { - var i = s.lastIndexOf(x); - return i === -1 ? nothing : just(i); - }; - }; - }; -}; - -exports["_lastIndexOf'"] = function (just) { - return function (nothing) { - return function (x) { - return function (startAt) { - return function (s) { - if (startAt < 0 || startAt > s.length) return nothing; - var i = s.lastIndexOf(x, startAt); - return i === -1 ? nothing : just(i); - }; - }; - }; - }; -}; - -exports.length = function (s) { - return s.length; -}; - exports._localeCompare = function (lt) { return function (eq) { return function (gt) { @@ -119,50 +29,12 @@ exports.replaceAll = function (s1) { }; }; -exports.take = function (n) { - return function (s) { - return s.substr(0, n); - }; -}; - -exports.drop = function (n) { - return function (s) { - return s.substring(n); - }; -}; - -exports._slice = function (b) { - return function (e) { - return function (s) { - return s.slice(b,e); - }; - }; -}; - -exports.countPrefix = function (p) { - return function (s) { - var i = 0; - while (i < s.length && p(s.charAt(i))) i++; - return i; - }; -}; - exports.split = function (sep) { return function (s) { return s.split(sep); }; }; -exports.splitAt = function (i) { - return function (s) { - return { before: s.substring(0, i), after: s.substring(i) }; - }; -}; - -exports.toCharArray = function (s) { - return s.split(""); -}; - exports.toLower = function (s) { return s.toLowerCase(); }; diff --git a/src/Data/String.purs b/src/Data/String.purs index 4c6843e..b6fbd0d 100644 --- a/src/Data/String.purs +++ b/src/Data/String.purs @@ -2,37 +2,15 @@ -- | A String represents a sequence of characters. -- | For details of the underlying implementation, see [String Reference at MDN](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String). module Data.String - ( Pattern(..) - , Replacement(..) - , charAt - , charCodeAt - , fromCharArray - , toChar + ( module Data.String.Pattern , contains - , indexOf - , indexOf' - , lastIndexOf - , lastIndexOf' , null - , uncons - , length - , singleton , localeCompare , replace , replaceAll - , take - , takeRight - , takeWhile - , drop - , dropRight - , dropWhile - , slice , stripPrefix , stripSuffix - , countPrefix , split - , splitAt - , toCharArray , toLower , toUpper , trim @@ -42,95 +20,8 @@ module Data.String import Prelude import Data.Maybe (Maybe(..), isJust) -import Data.Newtype (class Newtype) -import Data.String.Unsafe as U - --- | A newtype used in cases where there is a string to be matched. --- | --- | ```purescript --- | pursPattern = Pattern ".purs" --- | --can be used like this: --- | contains pursPattern "Test.purs" --- | == true --- | ``` --- | -newtype Pattern = Pattern String - -derive instance eqPattern :: Eq Pattern -derive instance ordPattern :: Ord Pattern -derive instance newtypePattern :: Newtype Pattern _ - -instance showPattern :: Show Pattern where - show (Pattern s) = "(Pattern " <> show s <> ")" - --- | A newtype used in cases to specify a replacement for a pattern. -newtype Replacement = Replacement String - -derive instance eqReplacement :: Eq Replacement -derive instance ordReplacement :: Ord Replacement -derive instance newtypeReplacement :: Newtype Replacement _ - -instance showReplacement :: Show Replacement where - show (Replacement s) = "(Replacement " <> show s <> ")" - --- | Returns the character at the given index, if the index is within bounds. --- | --- | ```purescript --- | charAt 2 "Hello" == Just 'l' --- | charAt 10 "Hello" == Nothing --- | ``` --- | -charAt :: Int -> String -> Maybe Char -charAt = _charAt Just Nothing - -foreign import _charAt - :: (forall a. a -> Maybe a) - -> (forall a. Maybe a) - -> Int - -> String - -> Maybe Char - --- | Returns a string of length `1` containing the given character. --- | --- | ```purescript --- | singleton 'l' == "l" --- | ``` --- | -foreign import singleton :: Char -> String - --- | Returns the numeric Unicode value of the character at the given index, --- | if the index is within bounds. --- | ```purescript --- | charCodeAt 2 "5 €" == Just 0x20AC --- | charCodeAt 10 "5 €" == Nothing --- | ``` --- | -charCodeAt :: Int -> String -> Maybe Int -charCodeAt = _charCodeAt Just Nothing - -foreign import _charCodeAt - :: (forall a. a -> Maybe a) - -> (forall a. Maybe a) - -> Int - -> String - -> Maybe Int - --- | Converts the string to a character, if the length of the string is --- | exactly `1`. --- | --- | ```purescript --- | toChar "l" == Just 'l' --- | toChar "Hi" == Nothing -- since length is not 1 --- | ``` --- | -toChar :: String -> Maybe Char -toChar = _toChar Just Nothing - -foreign import _toChar - :: (forall a. a -> Maybe a) - -> (forall a. Maybe a) - -> String - -> Maybe Char +import Data.String.CodeUnits as SCU +import Data.String.Pattern (Pattern(..), Replacement(..)) -- | Returns `true` if the given string is empty. -- | @@ -138,68 +29,9 @@ foreign import _toChar -- | null "" == true -- | null "Hi" == false -- | ``` --- | null :: String -> Boolean null s = s == "" --- | Returns the first character and the rest of the string, --- | if the string is not empty. --- | --- | ```purescript --- | uncons "" == Nothing --- | uncons "Hello World" == Just { head: 'H', tail: "ello World" } --- | ``` --- | -uncons :: String -> Maybe { head :: Char, tail :: String } -uncons "" = Nothing -uncons s = Just { head: U.charAt zero s, tail: drop one s } - --- | Returns the longest prefix (possibly empty) of characters that satisfy --- | the predicate. --- | --- | ```purescript --- | takeWhile (_ /= ':') "http://purescript.org" == "http" --- | ``` --- | -takeWhile :: (Char -> Boolean) -> String -> String -takeWhile p s = take (countPrefix p s) s - --- | Returns the suffix remaining after `takeWhile`. --- | --- | ```purescript --- | dropWhile (_ /= '.') "Test.purs" == ".purs" --- | ``` --- | -dropWhile :: (Char -> Boolean) -> String -> String -dropWhile p s = drop (countPrefix p s) s - --- | Returns the substring at indices `[begin, end)`. --- | If either index is negative, it is normalised to `length s - index`, --- | where `s` is the input string. `Nothing` is returned if either --- | index is out of bounds or if `begin > end` after normalisation. --- | --- | ```purescript --- | slice 0 0 "purescript" == Just "" --- | slice 0 1 "purescript" == Just "p" --- | slice 3 6 "purescript" == Just "esc" --- | slice (-4) (-1) "purescript" == Just "rip" --- | slice (-4) 3 "purescript" == Nothing --- | ``` -slice :: Int -> Int -> String -> Maybe String -slice b e s = if b' < 0 || b' >= l || - e' < 0 || e' >= l || - b' > e' - then Nothing - else Just (_slice b e s) - where - l = length s - norm x | x < 0 = l + x - | otherwise = x - b' = norm b - e' = norm e - -foreign import _slice :: Int -> Int -> String -> String - -- | If the string starts with the given prefix, return the portion of the -- | string left after removing it, as a Just value. Otherwise, return Nothing. -- | @@ -207,11 +39,10 @@ foreign import _slice :: Int -> Int -> String -> String -- | stripPrefix (Pattern "http:") "http://purescript.org" == Just "//purescript.org" -- | stripPrefix (Pattern "http:") "https://purescript.org" == Nothing -- | ``` --- | stripPrefix :: Pattern -> String -> Maybe String stripPrefix prefix@(Pattern prefixS) str = - case indexOf prefix str of - Just 0 -> Just $ drop (length prefixS) str + case SCU.indexOf prefix str of + Just 0 -> Just $ SCU.drop (SCU.length prefixS) str _ -> Nothing -- | If the string ends with the given suffix, return the portion of the @@ -222,116 +53,20 @@ stripPrefix prefix@(Pattern prefixS) str = -- | stripSuffix (Pattern ".exe") "psc.exe" == Just "psc" -- | stripSuffix (Pattern ".exe") "psc" == Nothing -- | ``` --- | stripSuffix :: Pattern -> String -> Maybe String stripSuffix suffix@(Pattern suffixS) str = - case lastIndexOf suffix str of - Just x | x == length str - length suffixS -> Just $ take x str + case SCU.lastIndexOf suffix str of + Just x | x == SCU.length str - SCU.length suffixS -> Just $ SCU.take x str _ -> Nothing --- | Converts an array of characters into a string. --- | --- | ```purescript --- | fromCharArray ['H', 'e', 'l', 'l', 'o'] == "Hello" --- | ``` --- | -foreign import fromCharArray :: Array Char -> String - -- | Checks whether the pattern appears in the given string. -- | -- | ```purescript -- | contains (Pattern "needle") "haystack with needle" == true -- | contains (Pattern "needle") "haystack" == false -- | ``` --- | contains :: Pattern -> String -> Boolean -contains pat = isJust <<< indexOf pat - --- | Returns the index of the first occurrence of the pattern in the --- | given string. Returns `Nothing` if there is no match. --- | --- | ```purescript --- | indexOf (Pattern "c") "abcdc" == Just 2 --- | indexOf (Pattern "c") "aaa" == Nothing --- | ``` --- | -indexOf :: Pattern -> String -> Maybe Int -indexOf = _indexOf Just Nothing - -foreign import _indexOf - :: (forall a. a -> Maybe a) - -> (forall a. Maybe a) - -> Pattern - -> String - -> Maybe Int - --- | Returns the index of the first occurrence of the pattern in the --- | given string, starting at the specified index. Returns `Nothing` if there is --- | no match. --- | --- | ```purescript --- | indexOf' (Pattern "a") 2 "ababa" == Just 2 --- | indexOf' (Pattern "a") 3 "ababa" == Just 4 --- | ``` --- | -indexOf' :: Pattern -> Int -> String -> Maybe Int -indexOf' = _indexOf' Just Nothing - -foreign import _indexOf' - :: (forall a. a -> Maybe a) - -> (forall a. Maybe a) - -> Pattern - -> Int - -> String - -> Maybe Int - --- | Returns the index of the last occurrence of the pattern in the --- | given string. Returns `Nothing` if there is no match. --- | --- | ```purescript --- | lastIndexOf (Pattern "c") "abcdc" == Just 4 --- | lastIndexOf (Pattern "c") "aaa" == Nothing --- | ``` --- | -lastIndexOf :: Pattern -> String -> Maybe Int -lastIndexOf = _lastIndexOf Just Nothing - -foreign import _lastIndexOf - :: (forall a. a -> Maybe a) - -> (forall a. Maybe a) - -> Pattern - -> String - -> Maybe Int - --- | Returns the index of the last occurrence of the pattern in the --- | given string, starting at the specified index --- | and searching backwards towards the beginning of the string. --- | Returns `Nothing` if there is no match. --- | --- | ```purescript --- | lastIndexOf' (Pattern "a") 1 "ababa" == Just 0 --- | lastIndexOf' (Pattern "a") 3 "ababa" == Just 2 --- | lastIndexOf' (Pattern "a") 4 "ababa" == Just 4 --- | ``` --- | -lastIndexOf' :: Pattern -> Int -> String -> Maybe Int -lastIndexOf' = _lastIndexOf' Just Nothing - -foreign import _lastIndexOf' - :: (forall a. a -> Maybe a) - -> (forall a. Maybe a) - -> Pattern - -> Int - -> String - -> Maybe Int - --- | Returns the number of characters the string is composed of. --- | --- | ```purescript --- | length "Hello World" == 11 --- | ``` --- | -foreign import length :: String -> Int +contains pat = isJust <<< SCU.indexOf pat -- | Compare two strings in a locale-aware fashion. This is in contrast to -- | the `Ord` instance on `String` which treats strings as arrays of code @@ -341,7 +76,6 @@ foreign import length :: String -> Int -- | "ä" `localeCompare` "b" == LT -- | "ä" `compare` "b" == GT -- | ``` --- | localeCompare :: String -> String -> Ordering localeCompare = _localeCompare LT EQ GT @@ -358,7 +92,6 @@ foreign import _localeCompare -- | ```purescript -- | replace (Pattern "<=") (Replacement "≤") "a <= b <= c" == "a ≤ b <= c" -- | ``` --- | foreign import replace :: Pattern -> Replacement -> String -> String -- | Replaces all occurences of the pattern with the replacement string. @@ -366,96 +99,21 @@ foreign import replace :: Pattern -> Replacement -> String -> String -- | ```purescript -- | replaceAll (Pattern "<=") (Replacement "≤") "a <= b <= c" == "a ≤ b ≤ c" -- | ``` --- | foreign import replaceAll :: Pattern -> Replacement -> String -> String --- | Returns the first `n` characters of the string. --- | --- | ```purescript --- | take 5 "Hello World" == "Hello" --- | ``` --- | -foreign import take :: Int -> String -> String - --- | Returns the last `n` characters of the string. --- | --- | ```purescript --- | takeRight 5 "Hello World" == "World" --- | ``` --- | -takeRight :: Int -> String -> String -takeRight i s = drop (length s - i) s - --- | Returns the string without the first `n` characters. --- | --- | ```purescript --- | drop 6 "Hello World" == "World" --- | ``` --- | -foreign import drop :: Int -> String -> String - --- | Returns the string without the last `n` characters. --- | --- | ```purescript --- | dropRight 6 "Hello World" == "Hello" --- | ``` --- | -dropRight :: Int -> String -> String -dropRight i s = take (length s - i) s - --- | Returns the number of contiguous characters at the beginning --- | of the string for which the predicate holds. --- | --- | ```purescript --- | countPrefix (_ /= ' ') "Hello World" == 5 -- since length "Hello" == 5 --- | ``` --- | -foreign import countPrefix :: (Char -> Boolean) -> String -> Int - -- | Returns the substrings of the second string separated along occurences -- | of the first string. -- | -- | ```purescript -- | split (Pattern " ") "hello world" == ["hello", "world"] -- | ``` --- | foreign import split :: Pattern -> String -> Array String --- | Splits a string into two substrings, where `before` contains the --- | characters up to (but not including) the given index, and `after` contains --- | the rest of the string, from that index on. --- | --- | ```purescript --- | splitAt 2 "Hello World" == { before: "He", after: "llo World"} --- | splitAt 10 "Hi" == { before: "Hi", after: ""} --- | ``` --- | --- | Thus the length of `(splitAt i s).before` will equal either `i` or --- | `length s`, if that is shorter. (Or if `i` is negative the length will be --- | 0.) --- | --- | In code: --- | ```purescript --- | length (splitAt i s).before == min (max i 0) (length s) --- | (splitAt i s).before <> (splitAt i s).after == s --- | splitAt i s == {before: take i s, after: drop i s} --- | ``` -foreign import splitAt :: Int -> String -> { before :: String, after :: String } - --- | Converts the string into an array of characters. --- | --- | ```purescript --- | toCharArray "Hello☺\n" == ['H','e','l','l','o','☺','\n'] --- | ``` --- | -foreign import toCharArray :: String -> Array Char - -- | Returns the argument converted to lowercase. -- | -- | ```purescript -- | toLower "hElLo" == "hello" -- | ``` --- | foreign import toLower :: String -> String -- | Returns the argument converted to uppercase. @@ -463,7 +121,6 @@ foreign import toLower :: String -> String -- | ```purescript -- | toUpper "Hello" == "HELLO" -- | ``` --- | foreign import toUpper :: String -> String -- | Removes whitespace from the beginning and end of a string, including @@ -473,7 +130,6 @@ foreign import toUpper :: String -> String -- | ```purescript -- | trim " Hello \n World\n\t " == "Hello \n World" -- | ``` --- | foreign import trim :: String -> String -- | Joins the strings in the array together, inserting the first argument @@ -482,5 +138,4 @@ foreign import trim :: String -> String -- | ```purescript -- | joinWith ", " ["apple", "banana", "orange"] == "apple, banana, orange" -- | ``` --- | foreign import joinWith :: String -> Array String -> String diff --git a/src/Data/String/CodePoint.purs b/src/Data/String/CodePoint.purs new file mode 100644 index 0000000..b8cec10 --- /dev/null +++ b/src/Data/String/CodePoint.purs @@ -0,0 +1,446 @@ +-- | These functions allow PureScript strings to be treated as if they were +-- | sequences of Unicode code points instead of their true underlying +-- | implementation (sequences of UTF-16 code units). For nearly all uses of +-- | strings, these functions should be preferred over the ones in `Data.String`. +module Data.String.CodePoints + ( CodePoint + , codePointAt + , fromInt + , toInt + , fromChar + , toCodePointArray + , fromCodePointArray + , countPrefix + , drop + , dropWhile + , indexOf + , indexOf' + , lastIndexOf + , lastIndexOf' + , length + , singleton + , splitAt + , take + , takeWhile + , uncons + ) where + +import Prelude + +import Data.Array as Array +import Data.Char (toCharCode) +import Data.Char as Char +import Data.Int (hexadecimal, toStringAs) +import Data.Maybe (Maybe(..)) +import Data.String as String +import Data.String.Unsafe as Unsafe +import Data.Tuple (Tuple(..)) +import Data.Unfoldable (unfoldr) + +-- | CodePoint is an Int bounded between 0 and 0x10FFFF, corresponding to +-- | Unicode code points. +newtype CodePoint = CodePoint Int + +derive instance eqCodePoint :: Eq CodePoint +derive instance ordCodePoint :: Ord CodePoint + +instance showCodePoint :: Show CodePoint where + show (CodePoint i) = "(CodePoint 0x" <> String.toUpper (toStringAs hexadecimal i) <> ")" + +-- I would prefer that this smart constructor not need to exist and instead +-- CodePoint just implements Enum, but the Enum module already depends on this +-- one. To avoid the circular dependency, we just expose these two functions. +-- | +-- | ```purescript +-- | >>> it = fromInt 0x1D400 -- U+1D400 MATHEMATICAL BOLD CAPITAL A +-- | Just (CodePoint 0x1D400) +-- | +-- | >>> map singleton it +-- | Just "𝐀" +-- | +-- | >>> fromInt 0x110000 -- does not correspond to a Unicode code point +-- | Nothing +-- | ``` +-- | +fromInt :: Int -> Maybe CodePoint +fromInt n | 0 <= n && n <= 0x10FFFF = Just (CodePoint n) +fromInt n = Nothing + +-- | +-- | ```purescript +-- | >>> toInt (fromChar 'B') +-- | 66 +-- | +-- | >>> boldA = fromInt 0x1D400 +-- | >>> boldA +-- | Just (CodePoint 0x1D400) +-- | >>> map toInt boldA +-- | Just 119808 -- is the same as 0x1D400 +-- | ``` +-- | +toInt :: CodePoint -> Int +toInt (CodePoint n) = n + +-- | Creates a CodePoint from a given Char. +-- | +-- | ```purescript +-- | >>> fromChar 'B' +-- | CodePoint 0x42 -- represents 'B' +-- | ``` +-- | +fromChar :: Char -> CodePoint +fromChar = toCharCode >>> CodePoint + +unsurrogate :: Int -> Int -> CodePoint +unsurrogate lead trail = CodePoint ((lead - 0xD800) * 0x400 + (trail - 0xDC00) + 0x10000) + +isLead :: Int -> Boolean +isLead cu = 0xD800 <= cu && cu <= 0xDBFF + +isTrail :: Int -> Boolean +isTrail cu = 0xDC00 <= cu && cu <= 0xDFFF + +fromCharCode :: Int -> String +fromCharCode = String.singleton <<< Char.fromCharCode + +-- WARN: this function expects the String parameter to be non-empty +unsafeCodePointAt0 :: String -> CodePoint +unsafeCodePointAt0 = _unsafeCodePointAt0 unsafeCodePointAt0Fallback + +foreign import _unsafeCodePointAt0 + :: (String -> CodePoint) + -> String + -> CodePoint + +unsafeCodePointAt0Fallback :: String -> CodePoint +unsafeCodePointAt0Fallback s = + let cu0 = Unsafe.charCodeAt 0 s in + let cu1 = Unsafe.charCodeAt 1 s in + if isLead cu0 && isTrail cu1 + then unsurrogate cu0 cu1 + else CodePoint cu0 + + +-- | Returns the first code point of the string after dropping the given number +-- | of code points from the beginning, if there is such a code point. Operates +-- | in constant space and in time linear to the given index. +-- | +-- | ```purescript +-- | >>> codePointAt 1 "𝐀𝐀𝐀𝐀" +-- | Just (CodePoint 0x1D400) -- represents "𝐀" +-- | -- compare to Data.String: +-- | >>> charAt 1 "𝐀𝐀𝐀𝐀" +-- | Just '�' +-- | ``` +-- | +codePointAt :: Int -> String -> Maybe CodePoint +codePointAt n _ | n < 0 = Nothing +codePointAt 0 "" = Nothing +codePointAt 0 s = Just (unsafeCodePointAt0 s) +codePointAt n s = _codePointAt codePointAtFallback Just Nothing unsafeCodePointAt0 n s + +foreign import _codePointAt + :: (Int -> String -> Maybe CodePoint) + -> (forall a. a -> Maybe a) + -> (forall a. Maybe a) + -> (String -> CodePoint) + -> Int + -> String + -> Maybe CodePoint + +codePointAtFallback :: Int -> String -> Maybe CodePoint +codePointAtFallback n s = case uncons s of + Just { head, tail } -> if n == 0 then Just head else codePointAtFallback (n - 1) tail + _ -> Nothing + + +-- | Returns the number of code points in the leading sequence of code points +-- | which all match the given predicate. Operates in constant space and in +-- | time linear to the length of the string. +-- | +-- | ```purescript +-- | >>> countPrefix (\c -> toInt c == 0x1D400) "𝐀𝐀 b c 𝐀" +-- | 2 +-- | ``` +-- | +countPrefix :: (CodePoint -> Boolean) -> String -> Int +countPrefix = _countPrefix countFallback unsafeCodePointAt0 + +foreign import _countPrefix + :: ((CodePoint -> Boolean) -> String -> Int) + -> (String -> CodePoint) + -> (CodePoint -> Boolean) + -> String + -> Int + +countFallback :: (CodePoint -> Boolean) -> String -> Int +countFallback p s = countTail p s 0 + +countTail :: (CodePoint -> Boolean) -> String -> Int -> Int +countTail p s accum = case uncons s of + Just { head, tail } -> if p head then countTail p tail (accum + 1) else accum + _ -> accum + + +-- | Drops the given number of code points from the beginning of the string. If +-- | the string does not have that many code points, returns the empty string. +-- | Operates in constant space and in time linear to the given number. +-- | +-- | ```purescript +-- | >>> drop 5 "𝐀𝐀 b c" +-- | "c" +-- | -- compared to Data.String: +-- | >>> drop 5 "𝐀𝐀 b c" +-- | "b c" -- because "𝐀" occupies 2 code units +-- | ``` +-- | +drop :: Int -> String -> String +drop n s = String.drop (String.length (take n s)) s + + +-- | Drops the leading sequence of code points which all match the given +-- | predicate from the string. Operates in constant space and in time linear +-- | to the length of the string. +-- | +-- | ```purescript +-- | >>> dropWhile (\c -> toInt c == 0x1D400) "𝐀𝐀 b c 𝐀" +-- | " b c 𝐀" +-- | ``` +-- | +dropWhile :: (CodePoint -> Boolean) -> String -> String +dropWhile p s = drop (countPrefix p s) s + + +-- | Creates a string from an array of code points. Operates in space and time +-- | linear to the length of the array. +-- | +-- | ```purescript +-- | >>> codePointArray = toCodePointArray "c 𝐀" +-- | >>> codePointArray +-- | [CodePoint 0x63, CodePoint 0x20, CodePoint 0x1D400] +-- | >>> fromCodePointArray codePointArray +-- | "c 𝐀" +-- | ``` +-- | +fromCodePointArray :: Array CodePoint -> String +fromCodePointArray = _fromCodePointArray singletonFallback + +foreign import _fromCodePointArray + :: (CodePoint -> String) + -> Array CodePoint + -> String + +-- | Returns the number of code points preceding the first match of the given +-- | pattern in the string. Returns Nothing when no matches are found. +-- | +-- | ```purescript +-- | >>> indexOf (Pattern "𝐀") "b 𝐀𝐀 c 𝐀" +-- | Just 2 +-- | >>> indexOf (Pattern "o") "b 𝐀𝐀 c 𝐀" +-- | Nothing +-- | ``` +-- | +indexOf :: String.Pattern -> String -> Maybe Int +indexOf p s = (\i -> length (String.take i s)) <$> String.indexOf p s + + +-- | Returns the number of code points preceding the first match of the given +-- | pattern in the string. Pattern matches preceding the given index will be +-- | ignored. Returns Nothing when no matches are found. +-- | +-- | ```purescript +-- | >>> indexOf' (Pattern "𝐀") 4 "b 𝐀𝐀 c 𝐀" +-- | Just 7 +-- | >>> indexOf' (Pattern "o") 4 "b 𝐀𝐀 c 𝐀" +-- | Nothing +-- | ``` +-- | +indexOf' :: String.Pattern -> Int -> String -> Maybe Int +indexOf' p i s = + let s' = drop i s in + (\k -> i + length (String.take k s')) <$> String.indexOf p s' + + +-- | Returns the number of code points preceding the last match of the given +-- | pattern in the string. Returns Nothing when no matches are found. +-- | +-- | ```purescript +-- | >>> lastIndexOf (Pattern "𝐀") "b 𝐀𝐀 c 𝐀" +-- | Just 7 +-- | >>> lastIndexOf (Pattern "o") "b 𝐀𝐀 c 𝐀" +-- | Nothing +-- | ``` +-- | +lastIndexOf :: String.Pattern -> String -> Maybe Int +lastIndexOf p s = (\i -> length (String.take i s)) <$> String.lastIndexOf p s + + +-- | Returns the number of code points preceding the first match of the given +-- | pattern in the string. Pattern matches following the given index will be +-- | ignored. Returns Nothing when no matches are found. +-- | +-- | ```purescript +-- | >>> lastIndexOf' (Pattern "𝐀") 5 "b 𝐀𝐀 c 𝐀" +-- | Just 3 +-- | >>> lastIndexOf' (Pattern "o") 5 "b 𝐀𝐀 c 𝐀" +-- | Nothing +-- | ``` +-- | +lastIndexOf' :: String.Pattern -> Int -> String -> Maybe Int +lastIndexOf' p i s = + let i' = String.length (take i s) in + (\k -> length (String.take k s)) <$> String.lastIndexOf' p i' s + + +-- | Returns the number of code points in the string. Operates in constant +-- | space and in time linear to the length of the string. +-- | +-- | ```purescript +-- | >>> length "b 𝐀𝐀 c 𝐀" +-- | 8 +-- | -- compare to Data.String: +-- | >>> length "b 𝐀𝐀 c 𝐀" +-- | 11 +-- | ``` +-- | +length :: String -> Int +length = Array.length <<< toCodePointArray + + +-- | Creates a string containing just the given code point. Operates in +-- | constant space and time. +-- | +-- | ```purescript +-- | >>> map singleton (fromInt 0x1D400) +-- | Just "𝐀" +-- | ``` +-- | +singleton :: CodePoint -> String +singleton = _singleton singletonFallback + +foreign import _singleton + :: (CodePoint -> String) + -> CodePoint + -> String + +singletonFallback :: CodePoint -> String +singletonFallback (CodePoint cp) | cp <= 0xFFFF = fromCharCode cp +singletonFallback (CodePoint cp) = + let lead = ((cp - 0x10000) / 0x400) + 0xD800 in + let trail = (cp - 0x10000) `mod` 0x400 + 0xDC00 in + fromCharCode lead <> fromCharCode trail + + +-- | Splits a string into two substrings, where `before` contains the code +-- | points up to (but not including) the given index, and `after` contains the +-- | rest of the string, from that index on. +-- | +-- | ```purescript +-- | >>> splitAt 3 "b 𝐀𝐀 c 𝐀" +-- | Just { before: "b 𝐀", after: "𝐀 c 𝐀" } +-- | ``` +-- | +-- | Thus the length of `(splitAt i s).before` will equal either `i` or +-- | `length s`, if that is shorter. (Or if `i` is negative the length will be +-- | 0.) +-- | +-- | In code: +-- | ```purescript +-- | length (splitAt i s).before == min (max i 0) (length s) +-- | (splitAt i s).before <> (splitAt i s).after == s +-- | splitAt i s == {before: take i s, after: drop i s} +-- | ``` +splitAt :: Int -> String -> { before :: String, after :: String } +splitAt i s = + let before = take i s in + { before + -- inline drop i s to reuse the result of take i s + , after: String.drop (String.length before) s + } + +-- | Returns a string containing the given number of code points from the +-- | beginning of the given string. If the string does not have that many code +-- | points, returns the empty string. Operates in constant space and in time +-- | linear to the given number. +-- | +-- | ```purescript +-- | >>> take 3 "b 𝐀𝐀 c 𝐀" +-- | "b 𝐀" +-- | -- compare to Data.String: +-- | >>> take 3 "b 𝐀𝐀 c 𝐀" +-- | "b �" +-- | ``` +-- | +take :: Int -> String -> String +take = _take takeFallback + +foreign import _take :: (Int -> String -> String) -> Int -> String -> String + +takeFallback :: Int -> String -> String +takeFallback n _ | n < 1 = "" +takeFallback n s = case uncons s of + Just { head, tail } -> singleton head <> takeFallback (n - 1) tail + _ -> s + + +-- | Returns a string containing the leading sequence of code points which all +-- | match the given predicate from the string. Operates in constant space and +-- | in time linear to the length of the string. +-- | +-- | ```purescript +-- | >>> takeWhile (\c -> toInt c == 0x1D400) "𝐀𝐀 b c 𝐀" +-- | "𝐀𝐀" +-- | ``` +-- | +takeWhile :: (CodePoint -> Boolean) -> String -> String +takeWhile p s = take (countPrefix p s) s + + +-- | Creates an array of code points from a string. Operates in space and time +-- | linear to the length of the string. +-- | +-- | ```purescript +-- | >>> codePointArray = toCodePointArray "b 𝐀𝐀" +-- | >>> codePointArray +-- | [CodePoint 0x62, CodePoint 0x20, CodePoint 0x1D400, CodePoint 0x1D400] +-- | >>> map singleton codePointArray +-- | ["b", " ", "𝐀", "𝐀"] +-- | ``` +-- | +toCodePointArray :: String -> Array CodePoint +toCodePointArray = _toCodePointArray toCodePointArrayFallback unsafeCodePointAt0 + +foreign import _toCodePointArray + :: (String -> Array CodePoint) + -> (String -> CodePoint) + -> String + -> Array CodePoint + +toCodePointArrayFallback :: String -> Array CodePoint +toCodePointArrayFallback s = unfoldr unconsButWithTuple s + +unconsButWithTuple :: String -> Maybe (Tuple CodePoint String) +unconsButWithTuple s = (\{ head, tail } -> Tuple head tail) <$> uncons s + + +-- | Returns a record with the first code point and the remaining code points +-- | of the string. Returns Nothing if the string is empty. Operates in +-- | constant space and time. +-- | +-- | ```purescript +-- | >>> uncons "𝐀𝐀 c 𝐀" +-- | Just { head: CodePoint 0x1D400, tail: "𝐀 c 𝐀" } +-- | >>> uncons "" +-- | Nothing +-- | ``` +-- | +uncons :: String -> Maybe { head :: CodePoint, tail :: String } +uncons s = case String.length s of + 0 -> Nothing + 1 -> Just { head: CodePoint (Unsafe.charCodeAt 0 s), tail: "" } + _ -> + let cu0 = Unsafe.charCodeAt 0 s in + let cu1 = Unsafe.charCodeAt 1 s in + if isLead cu0 && isTrail cu1 + then Just { head: unsurrogate cu0 cu1, tail: String.drop 2 s } + else Just { head: CodePoint cu0, tail: String.drop 1 s } diff --git a/src/Data/String/CodePoints.purs b/src/Data/String/CodePoints.purs index d51d180..f6c512b 100644 --- a/src/Data/String/CodePoints.purs +++ b/src/Data/String/CodePoints.purs @@ -3,16 +3,16 @@ -- | implementation (sequences of UTF-16 code units). For nearly all uses of -- | strings, these functions should be preferred over the ones in `Data.String`. module Data.String.CodePoints - ( module StringReExports - , CodePoint() + ( CodePoint , codePointAt , codePointFromInt , codePointToInt , codePointFromChar + , toCodePointArray + , fromCodePointArray , countPrefix , drop , dropWhile - , fromCodePointArray , indexOf , indexOf' , lastIndexOf @@ -22,7 +22,6 @@ module Data.String.CodePoints , splitAt , take , takeWhile - , toCodePointArray , uncons ) where @@ -32,20 +31,12 @@ import Data.Array as Array import Data.Char (toCharCode) import Data.Char as Char import Data.Int (hexadecimal, toStringAs) -import Data.Maybe (Maybe(Just, Nothing)) +import Data.Maybe (Maybe(..)) import Data.String as String import Data.String.Unsafe as Unsafe --- WARN: If a new function is added to Data.String, a version of that function --- should be exported from this module, which should be the same except that it --- should operate on the code point level rather than the code unit level. If --- the function's behaviour does not change based on whether we consider --- strings as sequences of code points or code units, it can simply be --- re-exported from Data.String. -import Data.String (Pattern(..), Replacement(..), charAt, charCodeAt, contains, fromCharArray, joinWith, localeCompare, null, replace, replaceAll, split, stripPrefix, stripSuffix, toChar, toCharArray, toLower, toUpper, trim) as StringReExports -import Data.Tuple (Tuple(Tuple)) +import Data.Tuple (Tuple(..)) import Data.Unfoldable (unfoldr) - -- | CodePoint is an Int bounded between 0 and 0x10FFFF, corresponding to -- | Unicode code points. newtype CodePoint = CodePoint Int diff --git a/src/Data/String/CodeUnits.js b/src/Data/String/CodeUnits.js new file mode 100644 index 0000000..425bb47 --- /dev/null +++ b/src/Data/String/CodeUnits.js @@ -0,0 +1,119 @@ +"use strict"; + +exports.fromCharArray = function (a) { + return a.join(""); +}; + +exports.singleton = function (c) { + return c; +}; + +exports._charAt = function (just) { + return function (nothing) { + return function (i) { + return function (s) { + return i >= 0 && i < s.length ? just(s.charAt(i)) : nothing; + }; + }; + }; +}; + +exports._toChar = function (just) { + return function (nothing) { + return function (s) { + return s.length === 1 ? just(s) : nothing; + }; + }; +}; + +exports.toCharArray = function (s) { + return s.split(""); +}; + +exports.length = function (s) { + return s.length; +}; + +exports.countPrefix = function (p) { + return function (s) { + var i = 0; + while (i < s.length && p(s.charAt(i))) i++; + return i; + }; +}; + +exports._indexOf = function (just) { + return function (nothing) { + return function (x) { + return function (s) { + var i = s.indexOf(x); + return i === -1 ? nothing : just(i); + }; + }; + }; +}; + +exports["_indexOf'"] = function (just) { + return function (nothing) { + return function (x) { + return function (startAt) { + return function (s) { + if (startAt < 0 || startAt > s.length) return nothing; + var i = s.indexOf(x, startAt); + return i === -1 ? nothing : just(i); + }; + }; + }; + }; +}; + +exports._lastIndexOf = function (just) { + return function (nothing) { + return function (x) { + return function (s) { + var i = s.lastIndexOf(x); + return i === -1 ? nothing : just(i); + }; + }; + }; +}; + +exports["_lastIndexOf'"] = function (just) { + return function (nothing) { + return function (x) { + return function (startAt) { + return function (s) { + if (startAt < 0 || startAt > s.length) return nothing; + var i = s.lastIndexOf(x, startAt); + return i === -1 ? nothing : just(i); + }; + }; + }; + }; +}; + +exports.take = function (n) { + return function (s) { + return s.substr(0, n); + }; +}; + +exports.drop = function (n) { + return function (s) { + return s.substring(n); + }; +}; + +exports._slice = function (b) { + return function (e) { + return function (s) { + return s.slice(b,e); + }; + }; +}; + +exports.splitAt = function (i) { + return function (s) { + return { before: s.substring(0, i), after: s.substring(i) }; + }; +}; diff --git a/src/Data/String/CodeUnits.purs b/src/Data/String/CodeUnits.purs new file mode 100644 index 0000000..3c5ac9c --- /dev/null +++ b/src/Data/String/CodeUnits.purs @@ -0,0 +1,294 @@ +-- | Wraps the functions of Javascript's `String` object. +-- | A String represents a sequence of characters. +-- | For details of the underlying implementation, see [String Reference at MDN](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String). +module Data.String.CodeUnits + ( singleton + , fromCharArray + , charAt + , toChar + , toCharArray + , uncons + , length + , countPrefix + , indexOf + , indexOf' + , lastIndexOf + , lastIndexOf' + , take + , takeRight + , takeWhile + , drop + , dropRight + , dropWhile + , slice + , splitAt + ) where + +import Prelude + +import Data.Maybe (Maybe(..)) +import Data.String.Pattern (Pattern) +import Data.String.Unsafe as U + +-- | Returns a string of length `1` containing the given character. +-- | +-- | ```purescript +-- | singleton 'l' == "l" +-- | ``` +-- | +foreign import singleton :: Char -> String + +-- | Converts an array of characters into a string. +-- | +-- | ```purescript +-- | fromCharArray ['H', 'e', 'l', 'l', 'o'] == "Hello" +-- | ``` +foreign import fromCharArray :: Array Char -> String + +-- | Returns the character at the given index, if the index is within bounds. +-- | +-- | ```purescript +-- | charAt 2 "Hello" == Just 'l' +-- | charAt 10 "Hello" == Nothing +-- | ``` +-- | +charAt :: Int -> String -> Maybe Char +charAt = _charAt Just Nothing + +foreign import _charAt + :: (forall a. a -> Maybe a) + -> (forall a. Maybe a) + -> Int + -> String + -> Maybe Char + +-- | Converts the string to a character, if the length of the string is +-- | exactly `1`. +-- | +-- | ```purescript +-- | toChar "l" == Just 'l' +-- | toChar "Hi" == Nothing -- since length is not 1 +-- | ``` +toChar :: String -> Maybe Char +toChar = _toChar Just Nothing + +foreign import _toChar + :: (forall a. a -> Maybe a) + -> (forall a. Maybe a) + -> String + -> Maybe Char + +-- | Converts the string into an array of characters. +-- | +-- | ```purescript +-- | toCharArray "Hello☺\n" == ['H','e','l','l','o','☺','\n'] +-- | ``` +foreign import toCharArray :: String -> Array Char + +-- | Returns the first character and the rest of the string, +-- | if the string is not empty. +-- | +-- | ```purescript +-- | uncons "" == Nothing +-- | uncons "Hello World" == Just { head: 'H', tail: "ello World" } +-- | ``` +-- | +uncons :: String -> Maybe { head :: Char, tail :: String } +uncons "" = Nothing +uncons s = Just { head: U.charAt zero s, tail: drop one s } + +-- | Returns the number of characters the string is composed of. +-- | +-- | ```purescript +-- | length "Hello World" == 11 +-- | ``` +-- | +foreign import length :: String -> Int + +-- | Returns the number of contiguous characters at the beginning +-- | of the string for which the predicate holds. +-- | +-- | ```purescript +-- | countPrefix (_ /= ' ') "Hello World" == 5 -- since length "Hello" == 5 +-- | ``` +-- | +foreign import countPrefix :: (Char -> Boolean) -> String -> Int + +-- | Returns the index of the first occurrence of the pattern in the +-- | given string. Returns `Nothing` if there is no match. +-- | +-- | ```purescript +-- | indexOf (Pattern "c") "abcdc" == Just 2 +-- | indexOf (Pattern "c") "aaa" == Nothing +-- | ``` +-- | +indexOf :: Pattern -> String -> Maybe Int +indexOf = _indexOf Just Nothing + +foreign import _indexOf + :: (forall a. a -> Maybe a) + -> (forall a. Maybe a) + -> Pattern + -> String + -> Maybe Int + +-- | Returns the index of the first occurrence of the pattern in the +-- | given string, starting at the specified index. Returns `Nothing` if there is +-- | no match. +-- | +-- | ```purescript +-- | indexOf' (Pattern "a") 2 "ababa" == Just 2 +-- | indexOf' (Pattern "a") 3 "ababa" == Just 4 +-- | ``` +-- | +indexOf' :: Pattern -> Int -> String -> Maybe Int +indexOf' = _indexOf' Just Nothing + +foreign import _indexOf' + :: (forall a. a -> Maybe a) + -> (forall a. Maybe a) + -> Pattern + -> Int + -> String + -> Maybe Int + +-- | Returns the index of the last occurrence of the pattern in the +-- | given string. Returns `Nothing` if there is no match. +-- | +-- | ```purescript +-- | lastIndexOf (Pattern "c") "abcdc" == Just 4 +-- | lastIndexOf (Pattern "c") "aaa" == Nothing +-- | ``` +-- | +lastIndexOf :: Pattern -> String -> Maybe Int +lastIndexOf = _lastIndexOf Just Nothing + +foreign import _lastIndexOf + :: (forall a. a -> Maybe a) + -> (forall a. Maybe a) + -> Pattern + -> String + -> Maybe Int + +-- | Returns the index of the last occurrence of the pattern in the +-- | given string, starting at the specified index +-- | and searching backwards towards the beginning of the string. +-- | Returns `Nothing` if there is no match. +-- | +-- | ```purescript +-- | lastIndexOf' (Pattern "a") 1 "ababa" == Just 0 +-- | lastIndexOf' (Pattern "a") 3 "ababa" == Just 2 +-- | lastIndexOf' (Pattern "a") 4 "ababa" == Just 4 +-- | ``` +-- | +lastIndexOf' :: Pattern -> Int -> String -> Maybe Int +lastIndexOf' = _lastIndexOf' Just Nothing + +foreign import _lastIndexOf' + :: (forall a. a -> Maybe a) + -> (forall a. Maybe a) + -> Pattern + -> Int + -> String + -> Maybe Int + +-- | Returns the first `n` characters of the string. +-- | +-- | ```purescript +-- | take 5 "Hello World" == "Hello" +-- | ``` +-- | +foreign import take :: Int -> String -> String + +-- | Returns the last `n` characters of the string. +-- | +-- | ```purescript +-- | takeRight 5 "Hello World" == "World" +-- | ``` +-- | +takeRight :: Int -> String -> String +takeRight i s = drop (length s - i) s + +-- | Returns the longest prefix (possibly empty) of characters that satisfy +-- | the predicate. +-- | +-- | ```purescript +-- | takeWhile (_ /= ':') "http://purescript.org" == "http" +-- | ``` +-- | +takeWhile :: (Char -> Boolean) -> String -> String +takeWhile p s = take (countPrefix p s) s + +-- | Returns the string without the first `n` characters. +-- | +-- | ```purescript +-- | drop 6 "Hello World" == "World" +-- | ``` +-- | +foreign import drop :: Int -> String -> String + +-- | Returns the string without the last `n` characters. +-- | +-- | ```purescript +-- | dropRight 6 "Hello World" == "Hello" +-- | ``` +-- | +dropRight :: Int -> String -> String +dropRight i s = take (length s - i) s + +-- | Returns the suffix remaining after `takeWhile`. +-- | +-- | ```purescript +-- | dropWhile (_ /= '.') "Test.purs" == ".purs" +-- | ``` +-- | +dropWhile :: (Char -> Boolean) -> String -> String +dropWhile p s = drop (countPrefix p s) s + +-- | Returns the substring at indices `[begin, end)`. +-- | If either index is negative, it is normalised to `length s - index`, +-- | where `s` is the input string. `Nothing` is returned if either +-- | index is out of bounds or if `begin > end` after normalisation. +-- | +-- | ```purescript +-- | slice 0 0 "purescript" == Just "" +-- | slice 0 1 "purescript" == Just "p" +-- | slice 3 6 "purescript" == Just "esc" +-- | slice (-4) (-1) "purescript" == Just "rip" +-- | slice (-4) 3 "purescript" == Nothing +-- | ``` +slice :: Int -> Int -> String -> Maybe String +slice b e s = if b' < 0 || b' >= l || + e' < 0 || e' >= l || + b' > e' + then Nothing + else Just (_slice b e s) + where + l = length s + norm x | x < 0 = l + x + | otherwise = x + b' = norm b + e' = norm e + +foreign import _slice :: Int -> Int -> String -> String + +-- | Splits a string into two substrings, where `before` contains the +-- | characters up to (but not including) the given index, and `after` contains +-- | the rest of the string, from that index on. +-- | +-- | ```purescript +-- | splitAt 2 "Hello World" == { before: "He", after: "llo World"} +-- | splitAt 10 "Hi" == { before: "Hi", after: ""} +-- | ``` +-- | +-- | Thus the length of `(splitAt i s).before` will equal either `i` or +-- | `length s`, if that is shorter. (Or if `i` is negative the length will be +-- | 0.) +-- | +-- | In code: +-- | ```purescript +-- | length (splitAt i s).before == min (max i 0) (length s) +-- | (splitAt i s).before <> (splitAt i s).after == s +-- | splitAt i s == {before: take i s, after: drop i s} +-- | ``` +foreign import splitAt :: Int -> String -> { before :: String, after :: String } diff --git a/src/Data/String/Gen.purs b/src/Data/String/Gen.purs index 08c902a..845b5e8 100644 --- a/src/Data/String/Gen.purs +++ b/src/Data/String/Gen.purs @@ -5,13 +5,13 @@ import Prelude import Control.Monad.Gen (class MonadGen, chooseInt, unfoldable, sized, resize) import Control.Monad.Rec.Class (class MonadRec) import Data.Char.Gen as CG -import Data.String as S +import Data.String.CodeUnits as SCU -- | Generates a string using the specified character generator. genString :: forall m. MonadRec m => MonadGen m => m Char -> m String genString genChar = sized \size -> do newSize <- chooseInt 1 (max 1 size) - resize (const newSize) $ S.fromCharArray <$> unfoldable genChar + resize (const newSize) $ SCU.fromCharArray <$> unfoldable genChar -- | Generates a string using characters from the Unicode basic multilingual -- | plain. @@ -41,4 +41,3 @@ genAlphaLowercaseString = genString CG.genAlphaLowercase -- | Generates a string using uppercase characters from the basic Latin alphabet. genAlphaUppercaseString :: forall m. MonadRec m => MonadGen m => m String genAlphaUppercaseString = genString CG.genAlphaUppercase - diff --git a/src/Data/String/NonEmpty.purs b/src/Data/String/NonEmpty.purs deleted file mode 100644 index a6f50e2..0000000 --- a/src/Data/String/NonEmpty.purs +++ /dev/null @@ -1,499 +0,0 @@ --- | Non-empty strings. --- | --- | Please note that the examples in this documentation use a notation like --- | `NonEmptyString "..."` for demonstration purposes, `NonEmptyString` cannot --- | be created directly like that, as we can't prove the string is non-empty to --- | the compiler at compile-time. -module Data.String.NonEmpty - ( NonEmptyString - , NonEmptyReplacement(..) - , fromString - , unsafeFromString - , fromCharArray - , fromNonEmptyCharArray - , singleton - , cons - , snoc - , fromFoldable1 - , toString - , toCharArray - , toNonEmptyCharArray - , charAt - , charCodeAt - , toChar - , appendString - , prependString - , contains - , indexOf - , indexOf' - , lastIndexOf - , lastIndexOf' - , uncons - , length - , localeCompare - , replace - , replaceAll - , take - , takeRight - , takeWhile - , drop - , dropRight - , dropWhile - , stripPrefix - , stripSuffix - , countPrefix - , splitAt - , toLower - , toUpper - , trim - , joinWith - , join1With - , joinWith1 - , module Data.String - ) where - -import Prelude - -import Data.Array.NonEmpty (NonEmptyArray) -import Data.Array.NonEmpty as NEA -import Data.Foldable (class Foldable) -import Data.Foldable as F -import Data.Maybe (Maybe(..), fromJust) -import Data.Semigroup.Foldable (class Foldable1) -import Data.Semigroup.Foldable as F1 -import Data.String (Pattern(..)) -import Data.String as String -import Data.String.Unsafe as U -import Partial.Unsafe (unsafePartial) -import Unsafe.Coerce (unsafeCoerce) - --- | A string that is known not to be empty. -newtype NonEmptyString = NonEmptyString String - -derive newtype instance eqNonEmptyString ∷ Eq NonEmptyString -derive newtype instance ordNonEmptyString ∷ Ord NonEmptyString -derive newtype instance semigroupNonEmptyString ∷ Semigroup NonEmptyString - -instance showNonEmptyString :: Show NonEmptyString where - show (NonEmptyString s) = "(NonEmptyString.unsafeFromString " <> show s <> ")" - --- | A newtype used in cases to specify a non-empty replacement for a pattern. -newtype NonEmptyReplacement = NonEmptyReplacement NonEmptyString - -derive newtype instance eqNonEmptyReplacement :: Eq NonEmptyReplacement -derive newtype instance ordNonEmptyReplacement :: Ord NonEmptyReplacement -derive newtype instance semigroupNonEmptyReplacement ∷ Semigroup NonEmptyReplacement - -instance showNonEmptyReplacement :: Show NonEmptyReplacement where - show (NonEmptyReplacement s) = "(NonEmptyReplacement " <> show s <> ")" - --- | Creates a `NonEmptyString` from a `String`, returning `Nothing` if the --- | input is empty. --- | --- | ```purescript --- | fromString "" = Nothing --- | fromString "hello" = Just (NonEmptyString "hello") --- | ``` -fromString :: String -> Maybe NonEmptyString -fromString = case _ of - "" -> Nothing - s -> Just (NonEmptyString s) - --- | A partial version of `fromString`. -unsafeFromString :: Partial => String -> NonEmptyString -unsafeFromString = fromJust <<< fromString - --- | Creates a `NonEmptyString` from a character array `String`, returning --- | `Nothing` if the input is empty. --- | --- | ```purescript --- | fromCharArray [] = Nothing --- | fromCharArray ['a', 'b', 'c'] = Just (NonEmptyString "abc") --- | ``` -fromCharArray :: Array Char -> Maybe NonEmptyString -fromCharArray = case _ of - [] -> Nothing - cs -> Just (NonEmptyString (String.fromCharArray cs)) - -fromNonEmptyCharArray :: NonEmptyArray Char -> NonEmptyString -fromNonEmptyCharArray = unsafePartial fromJust <<< fromCharArray <<< NEA.toArray - --- | Creates a `NonEmptyString` from a character. -singleton :: Char -> NonEmptyString -singleton = NonEmptyString <<< String.singleton - --- | Creates a `NonEmptyString` from a string by prepending a character. --- | --- | ```purescript --- | cons 'a' "bc" = NonEmptyString "abc" --- | cons 'a' "" = NonEmptyString "a" --- | ``` -cons :: Char -> String -> NonEmptyString -cons c s = NonEmptyString (String.singleton c <> s) - --- | Creates a `NonEmptyString` from a string by appending a character. --- | --- | ```purescript --- | snoc 'c' "ab" = NonEmptyString "abc" --- | snoc 'a' "" = NonEmptyString "a" --- | ``` -snoc :: Char -> String -> NonEmptyString -snoc c s = NonEmptyString (s <> String.singleton c) - --- | Creates a `NonEmptyString` from a `Foldable1` container carrying --- | characters. -fromFoldable1 :: forall f. Foldable1 f => f Char -> NonEmptyString -fromFoldable1 = F1.fold1 <<< coe - where - coe ∷ f Char -> f NonEmptyString - coe = unsafeCoerce - --- | Converts a `NonEmptyString` back into a standard `String`. -toString :: NonEmptyString -> String -toString (NonEmptyString s) = s - --- | Returns the character at the given index, if the index is within bounds. --- | --- | ```purescript --- | charAt 2 (NonEmptyString "Hello") == Just 'l' --- | charAt 10 (NonEmptyString "Hello") == Nothing --- | ``` -charAt :: Int -> NonEmptyString -> Maybe Char -charAt = liftS <<< String.charAt - --- | Returns the numeric Unicode value of the character at the given index, --- | if the index is within bounds. --- | --- | ```purescript --- | charCodeAt 2 (NonEmptyString "5 €") == Just 0x20AC --- | charCodeAt 10 (NonEmptyString "5 €") == Nothing --- | ``` -charCodeAt :: Int -> NonEmptyString -> Maybe Int -charCodeAt = liftS <<< String.charCodeAt - --- | Converts the `NonEmptyString` to a character, if the length of the string --- | is exactly `1`. --- | --- | ```purescript --- | toChar "H" == Just 'H' --- | toChar "Hi" == Nothing --- | ``` -toChar :: NonEmptyString -> Maybe Char -toChar (NonEmptyString s) = String.toChar s - --- | Converts the `NonEmptyString` into an array of characters. --- | --- | ```purescript --- | toCharArray (NonEmptyString "Hello☺\n") == ['H','e','l','l','o','☺','\n'] --- | ``` -toCharArray :: NonEmptyString -> Array Char -toCharArray (NonEmptyString s) = String.toCharArray s - --- | Converts the `NonEmptyString` into a non-empty array of characters. -toNonEmptyCharArray :: NonEmptyString -> NonEmptyArray Char -toNonEmptyCharArray = unsafePartial fromJust <<< NEA.fromArray <<< toCharArray - --- | Appends a string to this non-empty string. Since one of the strings is --- | non-empty we know the result will be too. --- | --- | ```purescript --- | appendString (NonEmptyString "Hello") " world" == NonEmptyString "Hello world" --- | appendString (NonEmptyString "Hello") "" == NonEmptyString "Hello" --- | ``` -appendString :: NonEmptyString -> String -> NonEmptyString -appendString (NonEmptyString s1) s2 = NonEmptyString (s1 <> s2) - --- | Prepends a string to this non-empty string. Since one of the strings is --- | non-empty we know the result will be too. --- | --- | ```purescript --- | prependString "be" (NonEmptyString "fore") == NonEmptyString "before" --- | prependString "" (NonEmptyString "fore") == NonEmptyString "fore" --- | ``` -prependString :: String -> NonEmptyString -> NonEmptyString -prependString s1 (NonEmptyString s2) = NonEmptyString (s1 <> s2) - --- | Returns the first character and the rest of the string. --- | --- | ```purescript --- | uncons "a" == { head: 'a', tail: Nothing } --- | uncons "Hello World" == { head: 'H', tail: Just (NonEmptyString "ello World") } --- | ``` -uncons :: NonEmptyString -> { head :: Char, tail :: Maybe NonEmptyString } -uncons (NonEmptyString s) = - { head: U.charAt 0 s - , tail: fromString (String.drop 1 s) - } - --- | Returns the longest prefix of characters that satisfy the predicate. --- | `Nothing` is returned if there is no matching prefix. --- | --- | ```purescript --- | takeWhile (_ /= ':') (NonEmptyString "http://purescript.org") == Just (NonEmptyString "http") --- | takeWhile (_ == 'a') (NonEmptyString "xyz") == Nothing --- | ``` -takeWhile :: (Char -> Boolean) -> NonEmptyString -> Maybe NonEmptyString -takeWhile f = fromString <<< liftS (String.takeWhile f) - --- | Returns the suffix remaining after `takeWhile`. --- | --- | ```purescript --- | dropWhile (_ /= '.') (NonEmptyString "Test.purs") == Just (NonEmptyString ".purs") --- | ``` -dropWhile :: (Char -> Boolean) -> NonEmptyString -> Maybe NonEmptyString -dropWhile f = fromString <<< liftS (String.dropWhile f) - --- | If the string starts with the given prefix, return the portion of the --- | string left after removing it. If the prefix does not match or there is no --- | remainder, the result will be `Nothing`. --- | --- | ```purescript --- | stripPrefix (Pattern "http:") (NonEmptyString "http://purescript.org") == Just (NonEmptyString "//purescript.org") --- | stripPrefix (Pattern "http:") (NonEmptyString "https://purescript.org") == Nothing --- | stripPrefix (Pattern "Hello!") (NonEmptyString "Hello!") == Nothing --- | ``` -stripPrefix :: Pattern -> NonEmptyString -> Maybe NonEmptyString -stripPrefix pat = fromString <=< liftS (String.stripPrefix pat) - --- | If the string ends with the given suffix, return the portion of the --- | string left after removing it. If the suffix does not match or there is no --- | remainder, the result will be `Nothing`. --- | --- | ```purescript --- | stripSuffix (Pattern ".exe") (NonEmptyString "purs.exe") == Just (NonEmptyString "purs") --- | stripSuffix (Pattern ".exe") (NonEmptyString "purs") == Nothing --- | stripSuffix (Pattern "Hello!") (NonEmptyString "Hello!") == Nothing --- | ``` -stripSuffix :: Pattern -> NonEmptyString -> Maybe NonEmptyString -stripSuffix pat = fromString <=< liftS (String.stripSuffix pat) - --- | Checks whether the pattern appears in the given string. --- | --- | ```purescript --- | contains (Pattern "needle") (NonEmptyString "haystack with needle") == true --- | contains (Pattern "needle") (NonEmptyString "haystack") == false --- | ``` -contains :: Pattern -> NonEmptyString -> Boolean -contains = liftS <<< String.contains - --- | Returns the index of the first occurrence of the pattern in the --- | given string. Returns `Nothing` if there is no match. --- | --- | ```purescript --- | indexOf (Pattern "c") (NonEmptyString "abcdc") == Just 2 --- | indexOf (Pattern "c") (NonEmptyString "aaa") == Nothing --- | ``` -indexOf :: Pattern -> NonEmptyString -> Maybe Int -indexOf = liftS <<< String.indexOf - --- | Returns the index of the first occurrence of the pattern in the --- | given string, starting at the specified index. Returns `Nothing` if there is --- | no match. --- | --- | ```purescript --- | indexOf' (Pattern "a") 2 (NonEmptyString "ababa") == Just 2 --- | indexOf' (Pattern "a") 3 (NonEmptyString "ababa") == Just 4 --- | ``` -indexOf' :: Pattern -> Int -> NonEmptyString -> Maybe Int -indexOf' pat = liftS <<< String.indexOf' pat - --- | Returns the index of the last occurrence of the pattern in the --- | given string. Returns `Nothing` if there is no match. --- | --- | ```purescript --- | lastIndexOf (Pattern "c") (NonEmptyString "abcdc") == Just 4 --- | lastIndexOf (Pattern "c") (NonEmptyString "aaa") == Nothing --- | ``` -lastIndexOf :: Pattern -> NonEmptyString -> Maybe Int -lastIndexOf = liftS <<< String.lastIndexOf - --- | Returns the index of the last occurrence of the pattern in the --- | given string, starting at the specified index --- | and searching backwards towards the beginning of the string. --- | Returns `Nothing` if there is no match. --- | --- | ```purescript --- | lastIndexOf' (Pattern "a") 1 (NonEmptyString "ababa") == Just 0 --- | lastIndexOf' (Pattern "a") 3 (NonEmptyString "ababa") == Just 2 --- | lastIndexOf' (Pattern "a") 4 (NonEmptyString "ababa") == Just 4 --- | ``` -lastIndexOf' :: Pattern -> Int -> NonEmptyString -> Maybe Int -lastIndexOf' pat = liftS <<< String.lastIndexOf' pat - --- | Returns the number of characters the string is composed of. --- | --- | ```purescript --- | length (NonEmptyString "Hello World") == 11 --- | ``` -length :: NonEmptyString -> Int -length (NonEmptyString s) = String.length s - --- | Compare two strings in a locale-aware fashion. This is in contrast to --- | the `Ord` instance on `String` which treats strings as arrays of code --- | units: --- | --- | ```purescript --- | NonEmptyString "ä" `localeCompare` NonEmptyString "b" == LT --- | NonEmptyString "ä" `compare` NonEmptyString "b" == GT --- | ``` -localeCompare :: NonEmptyString -> NonEmptyString -> Ordering -localeCompare (NonEmptyString a) (NonEmptyString b) = String.localeCompare a b - --- | Replaces the first occurence of the pattern with the replacement string. --- | --- | ```purescript --- | replace (Pattern "<=") (NonEmptyReplacement "≤") (NonEmptyString "a <= b <= c") == NonEmptyString "a ≤ b <= c" --- | ``` -replace :: Pattern -> NonEmptyReplacement -> NonEmptyString -> NonEmptyString -replace pat (NonEmptyReplacement (NonEmptyString rep)) (NonEmptyString s) = - NonEmptyString (String.replace pat (String.Replacement rep) s) - --- | Replaces all occurences of the pattern with the replacement string. --- | --- | ```purescript --- | replaceAll (Pattern "<=") (NonEmptyReplacement "≤") (NonEmptyString "a <= b <= c") == NonEmptyString "a ≤ b ≤ c" --- | ``` -replaceAll :: Pattern -> NonEmptyReplacement -> NonEmptyString -> NonEmptyString -replaceAll pat (NonEmptyReplacement (NonEmptyString rep)) (NonEmptyString s) = - NonEmptyString (String.replaceAll pat (String.Replacement rep) s) - --- | Returns the first `n` characters of the string. Returns `Nothing` if `n` is --- | less than 1. --- | --- | ```purescript --- | take 5 (NonEmptyString "Hello World") == Just (NonEmptyString "Hello") --- | take 0 (NonEmptyString "Hello World") == Nothing --- | ``` -take :: Int -> NonEmptyString -> Maybe NonEmptyString -take i (NonEmptyString s) - | i < 1 = Nothing - | otherwise = Just (NonEmptyString (String.take i s)) - --- | Returns the last `n` characters of the string. Returns `Nothing` if `n` is --- | less than 1. --- | --- | ```purescript --- | take 5 (NonEmptyString "Hello World") == Just (NonEmptyString "World") --- | take 0 (NonEmptyString "Hello World") == Nothing --- | ``` -takeRight :: Int -> NonEmptyString -> Maybe NonEmptyString -takeRight i (NonEmptyString s) - | i < 1 = Nothing - | otherwise = Just (NonEmptyString (String.takeRight i s)) - --- | Returns the string without the first `n` characters. Returns `Nothing` if --- | more characters are dropped than the string is long. --- | --- | ```purescript --- | drop 6 (NonEmptyString "Hello World") == Just (NonEmptyString "World") --- | drop 20 (NonEmptyString "Hello World") == Nothing --- | ``` -drop :: Int -> NonEmptyString -> Maybe NonEmptyString -drop i (NonEmptyString s) - | i >= String.length s = Nothing - | otherwise = Just (NonEmptyString (String.drop i s)) - --- | Returns the string without the last `n` characters. Returns `Nothing` if --- | more characters are dropped than the string is long. --- | --- | ```purescript --- | dropRight 6 (NonEmptyString "Hello World") == Just (NonEmptyString "Hello") --- | dropRight 20 (NonEmptyString "Hello World") == Nothing --- | ``` -dropRight :: Int -> NonEmptyString -> Maybe NonEmptyString -dropRight i (NonEmptyString s) - | i >= String.length s = Nothing - | otherwise = Just (NonEmptyString (String.dropRight i s)) - --- | Returns the number of contiguous characters at the beginning of the string --- | for which the predicate holds. --- | --- | ```purescript --- | countPrefix (_ /= 'o') (NonEmptyString "Hello World") == 4 --- | ``` -countPrefix :: (Char -> Boolean) -> NonEmptyString -> Int -countPrefix = liftS <<< String.countPrefix - --- | Returns the substrings of a split at the given index, if the index is --- | within bounds. --- | --- | ```purescript --- | splitAt 2 (NonEmptyString "Hello World") == Just { before: Just (NonEmptyString "He"), after: Just (NonEmptyString "llo World") } --- | splitAt 10 (NonEmptyString "Hi") == Nothing --- | ``` -splitAt - :: Int - -> NonEmptyString - -> { before :: Maybe NonEmptyString, after :: Maybe NonEmptyString } -splitAt i (NonEmptyString s) = - case String.splitAt i s of - { before, after } -> { before: fromString before, after: fromString after } - --- | Returns the argument converted to lowercase. --- | --- | ```purescript --- | toLower (NonEmptyString "hElLo") == NonEmptyString "hello" --- | ``` -toLower :: NonEmptyString -> NonEmptyString -toLower (NonEmptyString s) = NonEmptyString (String.toLower s) - --- | Returns the argument converted to uppercase. --- | --- | ```purescript --- | toUpper (NonEmptyString "Hello") == NonEmptyString "HELLO" --- | ``` -toUpper :: NonEmptyString -> NonEmptyString -toUpper (NonEmptyString s) = NonEmptyString (String.toUpper s) - --- | Removes whitespace from the beginning and end of a string, including --- | [whitespace characters](http://www.ecma-international.org/ecma-262/5.1/#sec-7.2) --- | and [line terminators](http://www.ecma-international.org/ecma-262/5.1/#sec-7.3). --- | If the string is entirely made up of whitespace the result will be Nothing. --- | --- | ```purescript --- | trim (NonEmptyString " Hello \n World\n\t ") == Just (NonEmptyString "Hello \n World") --- | trim (NonEmptyString " \n") == Nothing --- | ``` -trim :: NonEmptyString -> Maybe NonEmptyString -trim (NonEmptyString s) = fromString (String.trim s) - --- | Joins the strings in a container together as a new string, inserting the --- | first argument as separator between them. The result is not guaranteed to --- | be non-empty. --- | --- | ```purescript --- | joinWith ", " [NonEmptyString "apple", NonEmptyString "banana"] == "apple, banana" --- | joinWith ", " [] == "" --- | ``` -joinWith :: forall f. Foldable f => String -> f NonEmptyString -> String -joinWith splice = F.intercalate splice <<< coe - where - coe :: f NonEmptyString -> f String - coe = unsafeCoerce - --- | Joins non-empty strings in a non-empty container together as a new --- | non-empty string, inserting a possibly empty string as separator between --- | them. The result is guaranteed to be non-empty. --- | --- | ```purescript --- | -- array syntax is used for demonstration here, it would need to be a real `Foldable1` --- | join1With ", " [NonEmptyString "apple", NonEmptyString "banana"] == NonEmptyString "apple, banana" --- | join1With "" [NonEmptyString "apple", NonEmptyString "banana"] == NonEmptyString "applebanana" --- | ``` -join1With :: forall f. Foldable1 f => String -> f NonEmptyString -> NonEmptyString -join1With splice = NonEmptyString <<< joinWith splice - --- | Joins possibly empty strings in a non-empty container together as a new --- | non-empty string, inserting a non-empty string as a separator between them. --- | The result is guaranteed to be non-empty. --- | --- | ```purescript --- | -- array syntax is used for demonstration here, it would need to be a real `Foldable1` --- | joinWith1 (NonEmptyString ", ") ["apple", "banana"] == NonEmptyString "apple, banana" --- | joinWith1 (NonEmptyString "/") ["a", "b", "", "c", ""] == NonEmptyString "a/b//c/" --- | ``` -joinWith1 :: forall f. Foldable1 f => NonEmptyString -> f String -> NonEmptyString -joinWith1 (NonEmptyString splice) = NonEmptyString <<< F.intercalate splice - -liftS :: forall r. (String -> r) -> NonEmptyString -> r -liftS f (NonEmptyString s) = f s diff --git a/src/Data/String/Pattern.purs b/src/Data/String/Pattern.purs new file mode 100644 index 0000000..e0aea96 --- /dev/null +++ b/src/Data/String/Pattern.purs @@ -0,0 +1,33 @@ +module Data.String.Pattern where + +import Prelude + +import Data.Newtype (class Newtype) + +-- | A newtype used in cases where there is a string to be matched. +-- | +-- | ```purescript +-- | pursPattern = Pattern ".purs" +-- | --can be used like this: +-- | contains pursPattern "Test.purs" +-- | == true +-- | ``` +-- | +newtype Pattern = Pattern String + +derive instance eqPattern :: Eq Pattern +derive instance ordPattern :: Ord Pattern +derive instance newtypePattern :: Newtype Pattern _ + +instance showPattern :: Show Pattern where + show (Pattern s) = "(Pattern " <> show s <> ")" + +-- | A newtype used in cases to specify a replacement for a pattern. +newtype Replacement = Replacement String + +derive instance eqReplacement :: Eq Replacement +derive instance ordReplacement :: Ord Replacement +derive instance newtypeReplacement :: Newtype Replacement _ + +instance showReplacement :: Show Replacement where + show (Replacement s) = "(Replacement " <> show s <> ")" From d723c305bdb20fcbfaf2c2bf16adee958b3df0b6 Mon Sep 17 00:00:00 2001 From: Gary Burgess Date: Mon, 21 May 2018 15:29:20 +0100 Subject: [PATCH 12/18] Major re-arrange for CodeUnit/CodePoint split --- bower.json | 7 +- src/Data/Char.js | 8 - src/Data/Char.purs | 10 +- src/Data/Char/Gen.purs | 14 +- src/Data/String.purs | 140 +--- ...sitiveString.purs => CaseInsensitive.purs} | 0 src/Data/String/CodePoint.purs | 446 ---------- src/Data/String/CodePoints.purs | 441 +++++----- src/Data/String/CodeUnits.js | 8 +- src/Data/String/CodeUnits.purs | 77 +- src/Data/{String.js => String/Common.js} | 0 src/Data/String/Common.purs | 96 +++ src/Data/String/NonEmpty.purs | 241 ++++++ src/Data/String/NonEmpty/CaseInsensitive.purs | 22 + src/Data/String/NonEmpty/CodePoints.purs | 136 +++ src/Data/String/NonEmpty/CodeUnits.purs | 298 +++++++ src/Data/String/Unsafe.js | 7 - src/Data/String/Unsafe.purs | 6 - test/Test/Data/Char.purs | 35 +- test/Test/Data/String.purs | 299 +++---- test/Test/Data/String/CaseInsensitive.purs | 18 +- test/Test/Data/String/CodePoints.purs | 791 ++++++++++++++---- test/Test/Data/String/CodeUnits.purs | 456 ++++++++++ test/Test/Data/String/NonEmpty.purs | 403 ++++----- test/Test/Data/String/NonEmpty/CodeUnits.purs | 450 ++++++++++ test/Test/Data/String/Unsafe.purs | 27 +- test/Test/Main.purs | 6 + 27 files changed, 2948 insertions(+), 1494 deletions(-) rename src/Data/String/{CaseInsensitiveString.purs => CaseInsensitive.purs} (100%) delete mode 100644 src/Data/String/CodePoint.purs rename src/Data/{String.js => String/Common.js} (100%) create mode 100644 src/Data/String/Common.purs create mode 100644 src/Data/String/NonEmpty.purs create mode 100644 src/Data/String/NonEmpty/CaseInsensitive.purs create mode 100644 src/Data/String/NonEmpty/CodePoints.purs create mode 100644 src/Data/String/NonEmpty/CodeUnits.purs create mode 100644 test/Test/Data/String/CodeUnits.purs create mode 100644 test/Test/Data/String/NonEmpty/CodeUnits.purs diff --git a/bower.json b/bower.json index 225c8bc..db0c9b5 100644 --- a/bower.json +++ b/bower.json @@ -17,13 +17,14 @@ "package.json" ], "dependencies": { + "purescript-arrays": "#compiler/0.12", "purescript-either": "#compiler/0.12", + "purescript-enums": "#compiler/0.12", "purescript-gen": "#compiler/0.12", + "purescript-integers": "#compiler/0.12", "purescript-maybe": "#compiler/0.12", "purescript-partial": "#compiler/0.12", - "purescript-unfoldable": "#compiler/0.12", - "purescript-arrays": "#compiler/0.12", - "purescript-integers": "#compiler/0.12" + "purescript-unfoldable": "#compiler/0.12" }, "devDependencies": { "purescript-assert": "#compiler/0.12", diff --git a/src/Data/Char.js b/src/Data/Char.js index 15f12b2..d396533 100644 --- a/src/Data/Char.js +++ b/src/Data/Char.js @@ -1,13 +1,5 @@ "use strict"; -exports.toCharCode = function (c) { - return c.charCodeAt(0); -}; - -exports.fromCharCode = function (c) { - return String.fromCharCode(c); -}; - exports.toLower = function (c) { return c.toLowerCase(); }; diff --git a/src/Data/Char.purs b/src/Data/Char.purs index ffad899..169abb0 100644 --- a/src/Data/Char.purs +++ b/src/Data/Char.purs @@ -1,17 +1,9 @@ -- | A type and functions for single characters. module Data.Char - ( fromCharCode - , toCharCode - , toLower + ( toLower , toUpper ) where --- | Returns the numeric Unicode value of the character. -foreign import toCharCode :: Char -> Int - --- | Constructs a character from the given Unicode numeric value. -foreign import fromCharCode :: Int -> Char - -- | Converts a character to lowercase. foreign import toLower :: Char -> Char diff --git a/src/Data/Char/Gen.purs b/src/Data/Char/Gen.purs index 313cab3..838ff29 100644 --- a/src/Data/Char/Gen.purs +++ b/src/Data/Char/Gen.purs @@ -3,24 +3,24 @@ module Data.Char.Gen where import Prelude import Control.Monad.Gen (class MonadGen, chooseInt, oneOf) -import Data.Char as C +import Data.Enum (toEnumWithDefaults) import Data.NonEmpty ((:|)) -- | Generates a character of the Unicode basic multilingual plane. genUnicodeChar :: forall m. MonadGen m => m Char -genUnicodeChar = C.fromCharCode <$> chooseInt 0 65536 +genUnicodeChar = toEnumWithDefaults bottom top <$> chooseInt 0 65536 -- | Generates a character in the ASCII character set, excluding control codes. genAsciiChar :: forall m. MonadGen m => m Char -genAsciiChar = C.fromCharCode <$> chooseInt 32 127 +genAsciiChar = toEnumWithDefaults bottom top <$> chooseInt 32 127 -- | Generates a character in the ASCII character set. genAsciiChar' :: forall m. MonadGen m => m Char -genAsciiChar' = C.fromCharCode <$> chooseInt 0 127 +genAsciiChar' = toEnumWithDefaults bottom top <$> chooseInt 0 127 -- | Generates a character that is a numeric digit. genDigitChar :: forall m. MonadGen m => m Char -genDigitChar = C.fromCharCode <$> chooseInt 48 57 +genDigitChar = toEnumWithDefaults bottom top <$> chooseInt 48 57 -- | Generates a character from the basic latin alphabet. genAlpha :: forall m. MonadGen m => m Char @@ -28,8 +28,8 @@ genAlpha = oneOf (genAlphaLowercase :| [genAlphaUppercase]) -- | Generates a lowercase character from the basic latin alphabet. genAlphaLowercase :: forall m. MonadGen m => m Char -genAlphaLowercase = C.fromCharCode <$> chooseInt 97 122 +genAlphaLowercase = toEnumWithDefaults bottom top <$> chooseInt 97 122 -- | Generates an uppercase character from the basic latin alphabet. genAlphaUppercase :: forall m. MonadGen m => m Char -genAlphaUppercase = C.fromCharCode <$> chooseInt 65 90 +genAlphaUppercase = toEnumWithDefaults bottom top <$> chooseInt 65 90 diff --git a/src/Data/String.purs b/src/Data/String.purs index b6fbd0d..a382e6e 100644 --- a/src/Data/String.purs +++ b/src/Data/String.purs @@ -1,141 +1,9 @@ --- | Wraps the functions of Javascript's `String` object. --- | A String represents a sequence of characters. --- | For details of the underlying implementation, see [String Reference at MDN](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String). module Data.String ( module Data.String.Pattern - , contains - , null - , localeCompare - , replace - , replaceAll - , stripPrefix - , stripSuffix - , split - , toLower - , toUpper - , trim - , joinWith + , module Data.String.Common + , module Data.String.CodeUnits ) where -import Prelude - -import Data.Maybe (Maybe(..), isJust) -import Data.String.CodeUnits as SCU import Data.String.Pattern (Pattern(..), Replacement(..)) - --- | Returns `true` if the given string is empty. --- | --- | ```purescript --- | null "" == true --- | null "Hi" == false --- | ``` -null :: String -> Boolean -null s = s == "" - --- | If the string starts with the given prefix, return the portion of the --- | string left after removing it, as a Just value. Otherwise, return Nothing. --- | --- | ```purescript --- | stripPrefix (Pattern "http:") "http://purescript.org" == Just "//purescript.org" --- | stripPrefix (Pattern "http:") "https://purescript.org" == Nothing --- | ``` -stripPrefix :: Pattern -> String -> Maybe String -stripPrefix prefix@(Pattern prefixS) str = - case SCU.indexOf prefix str of - Just 0 -> Just $ SCU.drop (SCU.length prefixS) str - _ -> Nothing - --- | If the string ends with the given suffix, return the portion of the --- | string left after removing it, as a `Just` value. Otherwise, return --- | `Nothing`. --- | --- | ```purescript --- | stripSuffix (Pattern ".exe") "psc.exe" == Just "psc" --- | stripSuffix (Pattern ".exe") "psc" == Nothing --- | ``` -stripSuffix :: Pattern -> String -> Maybe String -stripSuffix suffix@(Pattern suffixS) str = - case SCU.lastIndexOf suffix str of - Just x | x == SCU.length str - SCU.length suffixS -> Just $ SCU.take x str - _ -> Nothing - --- | Checks whether the pattern appears in the given string. --- | --- | ```purescript --- | contains (Pattern "needle") "haystack with needle" == true --- | contains (Pattern "needle") "haystack" == false --- | ``` -contains :: Pattern -> String -> Boolean -contains pat = isJust <<< SCU.indexOf pat - --- | Compare two strings in a locale-aware fashion. This is in contrast to --- | the `Ord` instance on `String` which treats strings as arrays of code --- | units: --- | --- | ```purescript --- | "ä" `localeCompare` "b" == LT --- | "ä" `compare` "b" == GT --- | ``` -localeCompare :: String -> String -> Ordering -localeCompare = _localeCompare LT EQ GT - -foreign import _localeCompare - :: Ordering - -> Ordering - -> Ordering - -> String - -> String - -> Ordering - --- | Replaces the first occurence of the pattern with the replacement string. --- | --- | ```purescript --- | replace (Pattern "<=") (Replacement "≤") "a <= b <= c" == "a ≤ b <= c" --- | ``` -foreign import replace :: Pattern -> Replacement -> String -> String - --- | Replaces all occurences of the pattern with the replacement string. --- | --- | ```purescript --- | replaceAll (Pattern "<=") (Replacement "≤") "a <= b <= c" == "a ≤ b ≤ c" --- | ``` -foreign import replaceAll :: Pattern -> Replacement -> String -> String - --- | Returns the substrings of the second string separated along occurences --- | of the first string. --- | --- | ```purescript --- | split (Pattern " ") "hello world" == ["hello", "world"] --- | ``` -foreign import split :: Pattern -> String -> Array String - --- | Returns the argument converted to lowercase. --- | --- | ```purescript --- | toLower "hElLo" == "hello" --- | ``` -foreign import toLower :: String -> String - --- | Returns the argument converted to uppercase. --- | --- | ```purescript --- | toUpper "Hello" == "HELLO" --- | ``` -foreign import toUpper :: String -> String - --- | Removes whitespace from the beginning and end of a string, including --- | [whitespace characters](http://www.ecma-international.org/ecma-262/5.1/#sec-7.2) --- | and [line terminators](http://www.ecma-international.org/ecma-262/5.1/#sec-7.3). --- | --- | ```purescript --- | trim " Hello \n World\n\t " == "Hello \n World" --- | ``` -foreign import trim :: String -> String - --- | Joins the strings in the array together, inserting the first argument --- | as separator between them. --- | --- | ```purescript --- | joinWith ", " ["apple", "banana", "orange"] == "apple, banana, orange" --- | ``` -foreign import joinWith :: String -> Array String -> String +import Data.String.Common (joinWith, localeCompare, null, replace, replaceAll, split, toLower, toUpper, trim) +import Data.String.CodeUnits (contains, stripPrefix, stripSuffix) diff --git a/src/Data/String/CaseInsensitiveString.purs b/src/Data/String/CaseInsensitive.purs similarity index 100% rename from src/Data/String/CaseInsensitiveString.purs rename to src/Data/String/CaseInsensitive.purs diff --git a/src/Data/String/CodePoint.purs b/src/Data/String/CodePoint.purs deleted file mode 100644 index b8cec10..0000000 --- a/src/Data/String/CodePoint.purs +++ /dev/null @@ -1,446 +0,0 @@ --- | These functions allow PureScript strings to be treated as if they were --- | sequences of Unicode code points instead of their true underlying --- | implementation (sequences of UTF-16 code units). For nearly all uses of --- | strings, these functions should be preferred over the ones in `Data.String`. -module Data.String.CodePoints - ( CodePoint - , codePointAt - , fromInt - , toInt - , fromChar - , toCodePointArray - , fromCodePointArray - , countPrefix - , drop - , dropWhile - , indexOf - , indexOf' - , lastIndexOf - , lastIndexOf' - , length - , singleton - , splitAt - , take - , takeWhile - , uncons - ) where - -import Prelude - -import Data.Array as Array -import Data.Char (toCharCode) -import Data.Char as Char -import Data.Int (hexadecimal, toStringAs) -import Data.Maybe (Maybe(..)) -import Data.String as String -import Data.String.Unsafe as Unsafe -import Data.Tuple (Tuple(..)) -import Data.Unfoldable (unfoldr) - --- | CodePoint is an Int bounded between 0 and 0x10FFFF, corresponding to --- | Unicode code points. -newtype CodePoint = CodePoint Int - -derive instance eqCodePoint :: Eq CodePoint -derive instance ordCodePoint :: Ord CodePoint - -instance showCodePoint :: Show CodePoint where - show (CodePoint i) = "(CodePoint 0x" <> String.toUpper (toStringAs hexadecimal i) <> ")" - --- I would prefer that this smart constructor not need to exist and instead --- CodePoint just implements Enum, but the Enum module already depends on this --- one. To avoid the circular dependency, we just expose these two functions. --- | --- | ```purescript --- | >>> it = fromInt 0x1D400 -- U+1D400 MATHEMATICAL BOLD CAPITAL A --- | Just (CodePoint 0x1D400) --- | --- | >>> map singleton it --- | Just "𝐀" --- | --- | >>> fromInt 0x110000 -- does not correspond to a Unicode code point --- | Nothing --- | ``` --- | -fromInt :: Int -> Maybe CodePoint -fromInt n | 0 <= n && n <= 0x10FFFF = Just (CodePoint n) -fromInt n = Nothing - --- | --- | ```purescript --- | >>> toInt (fromChar 'B') --- | 66 --- | --- | >>> boldA = fromInt 0x1D400 --- | >>> boldA --- | Just (CodePoint 0x1D400) --- | >>> map toInt boldA --- | Just 119808 -- is the same as 0x1D400 --- | ``` --- | -toInt :: CodePoint -> Int -toInt (CodePoint n) = n - --- | Creates a CodePoint from a given Char. --- | --- | ```purescript --- | >>> fromChar 'B' --- | CodePoint 0x42 -- represents 'B' --- | ``` --- | -fromChar :: Char -> CodePoint -fromChar = toCharCode >>> CodePoint - -unsurrogate :: Int -> Int -> CodePoint -unsurrogate lead trail = CodePoint ((lead - 0xD800) * 0x400 + (trail - 0xDC00) + 0x10000) - -isLead :: Int -> Boolean -isLead cu = 0xD800 <= cu && cu <= 0xDBFF - -isTrail :: Int -> Boolean -isTrail cu = 0xDC00 <= cu && cu <= 0xDFFF - -fromCharCode :: Int -> String -fromCharCode = String.singleton <<< Char.fromCharCode - --- WARN: this function expects the String parameter to be non-empty -unsafeCodePointAt0 :: String -> CodePoint -unsafeCodePointAt0 = _unsafeCodePointAt0 unsafeCodePointAt0Fallback - -foreign import _unsafeCodePointAt0 - :: (String -> CodePoint) - -> String - -> CodePoint - -unsafeCodePointAt0Fallback :: String -> CodePoint -unsafeCodePointAt0Fallback s = - let cu0 = Unsafe.charCodeAt 0 s in - let cu1 = Unsafe.charCodeAt 1 s in - if isLead cu0 && isTrail cu1 - then unsurrogate cu0 cu1 - else CodePoint cu0 - - --- | Returns the first code point of the string after dropping the given number --- | of code points from the beginning, if there is such a code point. Operates --- | in constant space and in time linear to the given index. --- | --- | ```purescript --- | >>> codePointAt 1 "𝐀𝐀𝐀𝐀" --- | Just (CodePoint 0x1D400) -- represents "𝐀" --- | -- compare to Data.String: --- | >>> charAt 1 "𝐀𝐀𝐀𝐀" --- | Just '�' --- | ``` --- | -codePointAt :: Int -> String -> Maybe CodePoint -codePointAt n _ | n < 0 = Nothing -codePointAt 0 "" = Nothing -codePointAt 0 s = Just (unsafeCodePointAt0 s) -codePointAt n s = _codePointAt codePointAtFallback Just Nothing unsafeCodePointAt0 n s - -foreign import _codePointAt - :: (Int -> String -> Maybe CodePoint) - -> (forall a. a -> Maybe a) - -> (forall a. Maybe a) - -> (String -> CodePoint) - -> Int - -> String - -> Maybe CodePoint - -codePointAtFallback :: Int -> String -> Maybe CodePoint -codePointAtFallback n s = case uncons s of - Just { head, tail } -> if n == 0 then Just head else codePointAtFallback (n - 1) tail - _ -> Nothing - - --- | Returns the number of code points in the leading sequence of code points --- | which all match the given predicate. Operates in constant space and in --- | time linear to the length of the string. --- | --- | ```purescript --- | >>> countPrefix (\c -> toInt c == 0x1D400) "𝐀𝐀 b c 𝐀" --- | 2 --- | ``` --- | -countPrefix :: (CodePoint -> Boolean) -> String -> Int -countPrefix = _countPrefix countFallback unsafeCodePointAt0 - -foreign import _countPrefix - :: ((CodePoint -> Boolean) -> String -> Int) - -> (String -> CodePoint) - -> (CodePoint -> Boolean) - -> String - -> Int - -countFallback :: (CodePoint -> Boolean) -> String -> Int -countFallback p s = countTail p s 0 - -countTail :: (CodePoint -> Boolean) -> String -> Int -> Int -countTail p s accum = case uncons s of - Just { head, tail } -> if p head then countTail p tail (accum + 1) else accum - _ -> accum - - --- | Drops the given number of code points from the beginning of the string. If --- | the string does not have that many code points, returns the empty string. --- | Operates in constant space and in time linear to the given number. --- | --- | ```purescript --- | >>> drop 5 "𝐀𝐀 b c" --- | "c" --- | -- compared to Data.String: --- | >>> drop 5 "𝐀𝐀 b c" --- | "b c" -- because "𝐀" occupies 2 code units --- | ``` --- | -drop :: Int -> String -> String -drop n s = String.drop (String.length (take n s)) s - - --- | Drops the leading sequence of code points which all match the given --- | predicate from the string. Operates in constant space and in time linear --- | to the length of the string. --- | --- | ```purescript --- | >>> dropWhile (\c -> toInt c == 0x1D400) "𝐀𝐀 b c 𝐀" --- | " b c 𝐀" --- | ``` --- | -dropWhile :: (CodePoint -> Boolean) -> String -> String -dropWhile p s = drop (countPrefix p s) s - - --- | Creates a string from an array of code points. Operates in space and time --- | linear to the length of the array. --- | --- | ```purescript --- | >>> codePointArray = toCodePointArray "c 𝐀" --- | >>> codePointArray --- | [CodePoint 0x63, CodePoint 0x20, CodePoint 0x1D400] --- | >>> fromCodePointArray codePointArray --- | "c 𝐀" --- | ``` --- | -fromCodePointArray :: Array CodePoint -> String -fromCodePointArray = _fromCodePointArray singletonFallback - -foreign import _fromCodePointArray - :: (CodePoint -> String) - -> Array CodePoint - -> String - --- | Returns the number of code points preceding the first match of the given --- | pattern in the string. Returns Nothing when no matches are found. --- | --- | ```purescript --- | >>> indexOf (Pattern "𝐀") "b 𝐀𝐀 c 𝐀" --- | Just 2 --- | >>> indexOf (Pattern "o") "b 𝐀𝐀 c 𝐀" --- | Nothing --- | ``` --- | -indexOf :: String.Pattern -> String -> Maybe Int -indexOf p s = (\i -> length (String.take i s)) <$> String.indexOf p s - - --- | Returns the number of code points preceding the first match of the given --- | pattern in the string. Pattern matches preceding the given index will be --- | ignored. Returns Nothing when no matches are found. --- | --- | ```purescript --- | >>> indexOf' (Pattern "𝐀") 4 "b 𝐀𝐀 c 𝐀" --- | Just 7 --- | >>> indexOf' (Pattern "o") 4 "b 𝐀𝐀 c 𝐀" --- | Nothing --- | ``` --- | -indexOf' :: String.Pattern -> Int -> String -> Maybe Int -indexOf' p i s = - let s' = drop i s in - (\k -> i + length (String.take k s')) <$> String.indexOf p s' - - --- | Returns the number of code points preceding the last match of the given --- | pattern in the string. Returns Nothing when no matches are found. --- | --- | ```purescript --- | >>> lastIndexOf (Pattern "𝐀") "b 𝐀𝐀 c 𝐀" --- | Just 7 --- | >>> lastIndexOf (Pattern "o") "b 𝐀𝐀 c 𝐀" --- | Nothing --- | ``` --- | -lastIndexOf :: String.Pattern -> String -> Maybe Int -lastIndexOf p s = (\i -> length (String.take i s)) <$> String.lastIndexOf p s - - --- | Returns the number of code points preceding the first match of the given --- | pattern in the string. Pattern matches following the given index will be --- | ignored. Returns Nothing when no matches are found. --- | --- | ```purescript --- | >>> lastIndexOf' (Pattern "𝐀") 5 "b 𝐀𝐀 c 𝐀" --- | Just 3 --- | >>> lastIndexOf' (Pattern "o") 5 "b 𝐀𝐀 c 𝐀" --- | Nothing --- | ``` --- | -lastIndexOf' :: String.Pattern -> Int -> String -> Maybe Int -lastIndexOf' p i s = - let i' = String.length (take i s) in - (\k -> length (String.take k s)) <$> String.lastIndexOf' p i' s - - --- | Returns the number of code points in the string. Operates in constant --- | space and in time linear to the length of the string. --- | --- | ```purescript --- | >>> length "b 𝐀𝐀 c 𝐀" --- | 8 --- | -- compare to Data.String: --- | >>> length "b 𝐀𝐀 c 𝐀" --- | 11 --- | ``` --- | -length :: String -> Int -length = Array.length <<< toCodePointArray - - --- | Creates a string containing just the given code point. Operates in --- | constant space and time. --- | --- | ```purescript --- | >>> map singleton (fromInt 0x1D400) --- | Just "𝐀" --- | ``` --- | -singleton :: CodePoint -> String -singleton = _singleton singletonFallback - -foreign import _singleton - :: (CodePoint -> String) - -> CodePoint - -> String - -singletonFallback :: CodePoint -> String -singletonFallback (CodePoint cp) | cp <= 0xFFFF = fromCharCode cp -singletonFallback (CodePoint cp) = - let lead = ((cp - 0x10000) / 0x400) + 0xD800 in - let trail = (cp - 0x10000) `mod` 0x400 + 0xDC00 in - fromCharCode lead <> fromCharCode trail - - --- | Splits a string into two substrings, where `before` contains the code --- | points up to (but not including) the given index, and `after` contains the --- | rest of the string, from that index on. --- | --- | ```purescript --- | >>> splitAt 3 "b 𝐀𝐀 c 𝐀" --- | Just { before: "b 𝐀", after: "𝐀 c 𝐀" } --- | ``` --- | --- | Thus the length of `(splitAt i s).before` will equal either `i` or --- | `length s`, if that is shorter. (Or if `i` is negative the length will be --- | 0.) --- | --- | In code: --- | ```purescript --- | length (splitAt i s).before == min (max i 0) (length s) --- | (splitAt i s).before <> (splitAt i s).after == s --- | splitAt i s == {before: take i s, after: drop i s} --- | ``` -splitAt :: Int -> String -> { before :: String, after :: String } -splitAt i s = - let before = take i s in - { before - -- inline drop i s to reuse the result of take i s - , after: String.drop (String.length before) s - } - --- | Returns a string containing the given number of code points from the --- | beginning of the given string. If the string does not have that many code --- | points, returns the empty string. Operates in constant space and in time --- | linear to the given number. --- | --- | ```purescript --- | >>> take 3 "b 𝐀𝐀 c 𝐀" --- | "b 𝐀" --- | -- compare to Data.String: --- | >>> take 3 "b 𝐀𝐀 c 𝐀" --- | "b �" --- | ``` --- | -take :: Int -> String -> String -take = _take takeFallback - -foreign import _take :: (Int -> String -> String) -> Int -> String -> String - -takeFallback :: Int -> String -> String -takeFallback n _ | n < 1 = "" -takeFallback n s = case uncons s of - Just { head, tail } -> singleton head <> takeFallback (n - 1) tail - _ -> s - - --- | Returns a string containing the leading sequence of code points which all --- | match the given predicate from the string. Operates in constant space and --- | in time linear to the length of the string. --- | --- | ```purescript --- | >>> takeWhile (\c -> toInt c == 0x1D400) "𝐀𝐀 b c 𝐀" --- | "𝐀𝐀" --- | ``` --- | -takeWhile :: (CodePoint -> Boolean) -> String -> String -takeWhile p s = take (countPrefix p s) s - - --- | Creates an array of code points from a string. Operates in space and time --- | linear to the length of the string. --- | --- | ```purescript --- | >>> codePointArray = toCodePointArray "b 𝐀𝐀" --- | >>> codePointArray --- | [CodePoint 0x62, CodePoint 0x20, CodePoint 0x1D400, CodePoint 0x1D400] --- | >>> map singleton codePointArray --- | ["b", " ", "𝐀", "𝐀"] --- | ``` --- | -toCodePointArray :: String -> Array CodePoint -toCodePointArray = _toCodePointArray toCodePointArrayFallback unsafeCodePointAt0 - -foreign import _toCodePointArray - :: (String -> Array CodePoint) - -> (String -> CodePoint) - -> String - -> Array CodePoint - -toCodePointArrayFallback :: String -> Array CodePoint -toCodePointArrayFallback s = unfoldr unconsButWithTuple s - -unconsButWithTuple :: String -> Maybe (Tuple CodePoint String) -unconsButWithTuple s = (\{ head, tail } -> Tuple head tail) <$> uncons s - - --- | Returns a record with the first code point and the remaining code points --- | of the string. Returns Nothing if the string is empty. Operates in --- | constant space and time. --- | --- | ```purescript --- | >>> uncons "𝐀𝐀 c 𝐀" --- | Just { head: CodePoint 0x1D400, tail: "𝐀 c 𝐀" } --- | >>> uncons "" --- | Nothing --- | ``` --- | -uncons :: String -> Maybe { head :: CodePoint, tail :: String } -uncons s = case String.length s of - 0 -> Nothing - 1 -> Just { head: CodePoint (Unsafe.charCodeAt 0 s), tail: "" } - _ -> - let cu0 = Unsafe.charCodeAt 0 s in - let cu1 = Unsafe.charCodeAt 1 s in - if isLead cu0 && isTrail cu1 - then Just { head: unsurrogate cu0 cu1, tail: String.drop 2 s } - else Just { head: CodePoint cu0, tail: String.drop 1 s } diff --git a/src/Data/String/CodePoints.purs b/src/Data/String/CodePoints.purs index f6c512b..473731c 100644 --- a/src/Data/String/CodePoints.purs +++ b/src/Data/String/CodePoints.purs @@ -1,38 +1,42 @@ -- | These functions allow PureScript strings to be treated as if they were -- | sequences of Unicode code points instead of their true underlying -- | implementation (sequences of UTF-16 code units). For nearly all uses of --- | strings, these functions should be preferred over the ones in `Data.String`. +-- | strings, these functions should be preferred over the ones in +-- | `Data.String.CodeUnits`. module Data.String.CodePoints - ( CodePoint - , codePointAt - , codePointFromInt - , codePointToInt + ( module Data.String + , CodePoint , codePointFromChar - , toCodePointArray + , singleton , fromCodePointArray + , toCodePointArray + , codePointAt + , uncons + , length , countPrefix - , drop - , dropWhile , indexOf , indexOf' , lastIndexOf , lastIndexOf' - , length - , singleton - , splitAt , take + -- , takeRight , takeWhile - , uncons + , drop + -- , dropRight + , dropWhile + -- , slice + , splitAt ) where import Prelude import Data.Array as Array -import Data.Char (toCharCode) -import Data.Char as Char +import Data.Enum (class BoundedEnum, class Enum, Cardinality(..), defaultPred, defaultSucc, fromEnum, toEnum, toEnumWithDefaults) import Data.Int (hexadecimal, toStringAs) import Data.Maybe (Maybe(..)) import Data.String as String +import Data.String.CodeUnits as CU +import Data.String (Pattern(..), Replacement(..), contains, joinWith, localeCompare, null, replace, replaceAll, split, stripPrefix, stripSuffix, toLower, toUpper, trim) import Data.String.Unsafe as Unsafe import Data.Tuple (Tuple(..)) import Data.Unfoldable (unfoldr) @@ -47,79 +51,98 @@ derive instance ordCodePoint :: Ord CodePoint instance showCodePoint :: Show CodePoint where show (CodePoint i) = "(CodePoint 0x" <> String.toUpper (toStringAs hexadecimal i) <> ")" --- I would prefer that this smart constructor not need to exist and instead --- CodePoint just implements Enum, but the Enum module already depends on this --- one. To avoid the circular dependency, we just expose these two functions. +instance boundedCodePoint :: Bounded CodePoint where + bottom = CodePoint 0 + top = CodePoint 0x10FFFF + +instance enumCodePoint :: Enum CodePoint where + succ = defaultSucc toEnum fromEnum + pred = defaultPred toEnum fromEnum + +instance boundedEnumCodePoint :: BoundedEnum CodePoint where + cardinality = Cardinality (0x10FFFF + 1) + fromEnum (CodePoint n) = n + toEnum n + | n >= 0 && n <= 0x10FFFF = Just (CodePoint n) + | otherwise = Nothing + +-- | Creates a CodePoint from a given Char. -- | -- | ```purescript --- | >>> it = codePointFromInt 0x1D400 -- U+1D400 MATHEMATICAL BOLD CAPITAL A --- | Just (CodePoint 0x1D400) --- | --- | >>> map singleton it --- | Just "𝐀" --- | --- | >>> codePointFromInt 0x110000 -- does not correspond to a Unicode code point --- | Nothing +-- | >>> codePointFromChar 'B' +-- | CodePoint 0x42 -- represents 'B' -- | ``` -- | -codePointFromInt :: Int -> Maybe CodePoint -codePointFromInt n | 0 <= n && n <= 0x10FFFF = Just (CodePoint n) -codePointFromInt n = Nothing +codePointFromChar :: Char -> CodePoint +codePointFromChar = fromEnum >>> CodePoint +-- | Creates a string containing just the given code point. Operates in +-- | constant space and time. -- | -- | ```purescript --- | >>> codePointToInt (codePointFromChar 'B') --- | 66 --- | --- | >>> boldA = codePointFromInt 0x1D400 --- | >>> boldA --- | Just (CodePoint 0x1D400) --- | >>> map codePointToInt boldA --- | Just 119808 -- is the same as 0x1D400 +-- | >>> map singleton (codePointFromInt 0x1D400) +-- | Just "𝐀" -- | ``` -- | -codePointToInt :: CodePoint -> Int -codePointToInt (CodePoint n) = n +singleton :: CodePoint -> String +singleton = _singleton singletonFallback --- | Creates a CodePoint from a given Char. +foreign import _singleton + :: (CodePoint -> String) + -> CodePoint + -> String + +singletonFallback :: CodePoint -> String +singletonFallback (CodePoint cp) | cp <= 0xFFFF = fromCharCode cp +singletonFallback (CodePoint cp) = + let lead = ((cp - 0x10000) / 0x400) + 0xD800 in + let trail = (cp - 0x10000) `mod` 0x400 + 0xDC00 in + fromCharCode lead <> fromCharCode trail + +-- | Creates a string from an array of code points. Operates in space and time +-- | linear to the length of the array. -- | -- | ```purescript --- | >>> codePointFromChar 'B' --- | CodePoint 0x42 -- represents 'B' +-- | >>> codePointArray = toCodePointArray "c 𝐀" +-- | >>> codePointArray +-- | [CodePoint 0x63, CodePoint 0x20, CodePoint 0x1D400] +-- | >>> fromCodePointArray codePointArray +-- | "c 𝐀" -- | ``` -- | -codePointFromChar :: Char -> CodePoint -codePointFromChar = toCharCode >>> CodePoint - -unsurrogate :: Int -> Int -> CodePoint -unsurrogate lead trail = CodePoint ((lead - 0xD800) * 0x400 + (trail - 0xDC00) + 0x10000) - -isLead :: Int -> Boolean -isLead cu = 0xD800 <= cu && cu <= 0xDBFF - -isTrail :: Int -> Boolean -isTrail cu = 0xDC00 <= cu && cu <= 0xDFFF +fromCodePointArray :: Array CodePoint -> String +fromCodePointArray = _fromCodePointArray singletonFallback -fromCharCode :: Int -> String -fromCharCode = String.singleton <<< Char.fromCharCode +foreign import _fromCodePointArray + :: (CodePoint -> String) + -> Array CodePoint + -> String --- WARN: this function expects the String parameter to be non-empty -unsafeCodePointAt0 :: String -> CodePoint -unsafeCodePointAt0 = _unsafeCodePointAt0 unsafeCodePointAt0Fallback +-- | Creates an array of code points from a string. Operates in space and time +-- | linear to the length of the string. +-- | +-- | ```purescript +-- | >>> codePointArray = toCodePointArray "b 𝐀𝐀" +-- | >>> codePointArray +-- | [CodePoint 0x62, CodePoint 0x20, CodePoint 0x1D400, CodePoint 0x1D400] +-- | >>> map singleton codePointArray +-- | ["b", " ", "𝐀", "𝐀"] +-- | ``` +-- | +toCodePointArray :: String -> Array CodePoint +toCodePointArray = _toCodePointArray toCodePointArrayFallback unsafeCodePointAt0 -foreign import _unsafeCodePointAt0 - :: (String -> CodePoint) +foreign import _toCodePointArray + :: (String -> Array CodePoint) + -> (String -> CodePoint) -> String - -> CodePoint + -> Array CodePoint -unsafeCodePointAt0Fallback :: String -> CodePoint -unsafeCodePointAt0Fallback s = - let cu0 = Unsafe.charCodeAt 0 s in - let cu1 = Unsafe.charCodeAt 1 s in - if isLead cu0 && isTrail cu1 - then unsurrogate cu0 cu1 - else CodePoint cu0 +toCodePointArrayFallback :: String -> Array CodePoint +toCodePointArrayFallback s = unfoldr unconsButWithTuple s +unconsButWithTuple :: String -> Maybe (Tuple CodePoint String) +unconsButWithTuple s = (\{ head, tail } -> Tuple head tail) <$> uncons s -- | Returns the first code point of the string after dropping the given number -- | of code points from the beginning, if there is such a code point. Operates @@ -153,6 +176,43 @@ codePointAtFallback n s = case uncons s of Just { head, tail } -> if n == 0 then Just head else codePointAtFallback (n - 1) tail _ -> Nothing +-- | Returns a record with the first code point and the remaining code points +-- | of the string. Returns Nothing if the string is empty. Operates in +-- | constant space and time. +-- | +-- | ```purescript +-- | >>> uncons "𝐀𝐀 c 𝐀" +-- | Just { head: CodePoint 0x1D400, tail: "𝐀 c 𝐀" } +-- | >>> uncons "" +-- | Nothing +-- | ``` +-- | +uncons :: String -> Maybe { head :: CodePoint, tail :: String } +uncons s = case CU.length s of + 0 -> Nothing + 1 -> Just { head: CodePoint (fromEnum (Unsafe.charAt 0 s)), tail: "" } + _ -> + let + cu0 = fromEnum (Unsafe.charAt 0 s) + cu1 = fromEnum (Unsafe.charAt 1 s) + in + if isLead cu0 && isTrail cu1 + then Just { head: unsurrogate cu0 cu1, tail: CU.drop 2 s } + else Just { head: CodePoint cu0, tail: CU.drop 1 s } + +-- | Returns the number of code points in the string. Operates in constant +-- | space and in time linear to the length of the string. +-- | +-- | ```purescript +-- | >>> length "b 𝐀𝐀 c 𝐀" +-- | 8 +-- | -- compare to Data.String: +-- | >>> length "b 𝐀𝐀 c 𝐀" +-- | 11 +-- | ``` +-- | +length :: String -> Int +length = Array.length <<< toCodePointArray -- | Returns the number of code points in the leading sequence of code points -- | which all match the given predicate. Operates in constant space and in @@ -181,55 +241,6 @@ countTail p s accum = case uncons s of Just { head, tail } -> if p head then countTail p tail (accum + 1) else accum _ -> accum - --- | Drops the given number of code points from the beginning of the string. If --- | the string does not have that many code points, returns the empty string. --- | Operates in constant space and in time linear to the given number. --- | --- | ```purescript --- | >>> drop 5 "𝐀𝐀 b c" --- | "c" --- | -- compared to Data.String: --- | >>> drop 5 "𝐀𝐀 b c" --- | "b c" -- because "𝐀" occupies 2 code units --- | ``` --- | -drop :: Int -> String -> String -drop n s = String.drop (String.length (take n s)) s - - --- | Drops the leading sequence of code points which all match the given --- | predicate from the string. Operates in constant space and in time linear --- | to the length of the string. --- | --- | ```purescript --- | >>> dropWhile (\c -> codePointToInt c == 0x1D400) "𝐀𝐀 b c 𝐀" --- | " b c 𝐀" --- | ``` --- | -dropWhile :: (CodePoint -> Boolean) -> String -> String -dropWhile p s = drop (countPrefix p s) s - - --- | Creates a string from an array of code points. Operates in space and time --- | linear to the length of the array. --- | --- | ```purescript --- | >>> codePointArray = toCodePointArray "c 𝐀" --- | >>> codePointArray --- | [CodePoint 0x63, CodePoint 0x20, CodePoint 0x1D400] --- | >>> fromCodePointArray codePointArray --- | "c 𝐀" --- | ``` --- | -fromCodePointArray :: Array CodePoint -> String -fromCodePointArray = _fromCodePointArray singletonFallback - -foreign import _fromCodePointArray - :: (CodePoint -> String) - -> Array CodePoint - -> String - -- | Returns the number of code points preceding the first match of the given -- | pattern in the string. Returns Nothing when no matches are found. -- | @@ -240,9 +251,8 @@ foreign import _fromCodePointArray -- | Nothing -- | ``` -- | -indexOf :: String.Pattern -> String -> Maybe Int -indexOf p s = (\i -> length (String.take i s)) <$> String.indexOf p s - +indexOf :: Pattern -> String -> Maybe Int +indexOf p s = (\i -> length (CU.take i s)) <$> CU.indexOf p s -- | Returns the number of code points preceding the first match of the given -- | pattern in the string. Pattern matches preceding the given index will be @@ -255,11 +265,10 @@ indexOf p s = (\i -> length (String.take i s)) <$> String.indexOf p s -- | Nothing -- | ``` -- | -indexOf' :: String.Pattern -> Int -> String -> Maybe Int +indexOf' :: Pattern -> Int -> String -> Maybe Int indexOf' p i s = let s' = drop i s in - (\k -> i + length (String.take k s')) <$> String.indexOf p s' - + (\k -> i + length (CU.take k s')) <$> CU.indexOf p s' -- | Returns the number of code points preceding the last match of the given -- | pattern in the string. Returns Nothing when no matches are found. @@ -271,9 +280,8 @@ indexOf' p i s = -- | Nothing -- | ``` -- | -lastIndexOf :: String.Pattern -> String -> Maybe Int -lastIndexOf p s = (\i -> length (String.take i s)) <$> String.lastIndexOf p s - +lastIndexOf :: Pattern -> String -> Maybe Int +lastIndexOf p s = (\i -> length (CU.take i s)) <$> CU.lastIndexOf p s -- | Returns the number of code points preceding the first match of the given -- | pattern in the string. Pattern matches following the given index will be @@ -286,50 +294,73 @@ lastIndexOf p s = (\i -> length (String.take i s)) <$> String.lastIndexOf p s -- | Nothing -- | ``` -- | -lastIndexOf' :: String.Pattern -> Int -> String -> Maybe Int +lastIndexOf' :: Pattern -> Int -> String -> Maybe Int lastIndexOf' p i s = - let i' = String.length (take i s) in - (\k -> length (String.take k s)) <$> String.lastIndexOf' p i' s + let i' = CU.length (take i s) in + (\k -> length (CU.take k s)) <$> CU.lastIndexOf' p i' s - --- | Returns the number of code points in the string. Operates in constant --- | space and in time linear to the length of the string. +-- | Returns a string containing the given number of code points from the +-- | beginning of the given string. If the string does not have that many code +-- | points, returns the empty string. Operates in constant space and in time +-- | linear to the given number. -- | -- | ```purescript --- | >>> length "b 𝐀𝐀 c 𝐀" --- | 8 +-- | >>> take 3 "b 𝐀𝐀 c 𝐀" +-- | "b 𝐀" -- | -- compare to Data.String: --- | >>> length "b 𝐀𝐀 c 𝐀" --- | 11 +-- | >>> take 3 "b 𝐀𝐀 c 𝐀" +-- | "b �" -- | ``` -- | -length :: String -> Int -length = Array.length <<< toCodePointArray +take :: Int -> String -> String +take = _take takeFallback +foreign import _take :: (Int -> String -> String) -> Int -> String -> String --- | Creates a string containing just the given code point. Operates in --- | constant space and time. +takeFallback :: Int -> String -> String +takeFallback n _ | n < 1 = "" +takeFallback n s = case uncons s of + Just { head, tail } -> singleton head <> takeFallback (n - 1) tail + _ -> s + +-- | Returns a string containing the leading sequence of code points which all +-- | match the given predicate from the string. Operates in constant space and +-- | in time linear to the length of the string. -- | -- | ```purescript --- | >>> map singleton (codePointFromInt 0x1D400) --- | Just "𝐀" +-- | >>> takeWhile (\c -> codePointToInt c == 0x1D400) "𝐀𝐀 b c 𝐀" +-- | "𝐀𝐀" -- | ``` -- | -singleton :: CodePoint -> String -singleton = _singleton singletonFallback - -foreign import _singleton - :: (CodePoint -> String) - -> CodePoint - -> String +takeWhile :: (CodePoint -> Boolean) -> String -> String +takeWhile p s = take (countPrefix p s) s -singletonFallback :: CodePoint -> String -singletonFallback (CodePoint cp) | cp <= 0xFFFF = fromCharCode cp -singletonFallback (CodePoint cp) = - let lead = ((cp - 0x10000) / 0x400) + 0xD800 in - let trail = (cp - 0x10000) `mod` 0x400 + 0xDC00 in - fromCharCode lead <> fromCharCode trail +-- | Drops the given number of code points from the beginning of the string. If +-- | the string does not have that many code points, returns the empty string. +-- | Operates in constant space and in time linear to the given number. +-- | +-- | ```purescript +-- | >>> drop 5 "𝐀𝐀 b c" +-- | "c" +-- | -- compared to Data.String: +-- | >>> drop 5 "𝐀𝐀 b c" +-- | "b c" -- because "𝐀" occupies 2 code units +-- | ``` +-- | +drop :: Int -> String -> String +drop n s = CU.drop (CU.length (take n s)) s +-- | Drops the leading sequence of code points which all match the given +-- | predicate from the string. Operates in constant space and in time linear +-- | to the length of the string. +-- | +-- | ```purescript +-- | >>> dropWhile (\c -> codePointToInt c == 0x1D400) "𝐀𝐀 b c 𝐀" +-- | " b c 𝐀" +-- | ``` +-- | +dropWhile :: (CodePoint -> Boolean) -> String -> String +dropWhile p s = drop (countPrefix p s) s -- | Splits a string into two substrings, where `before` contains the code -- | points up to (but not including) the given index, and `after` contains the @@ -355,92 +386,36 @@ splitAt i s = let before = take i s in { before -- inline drop i s to reuse the result of take i s - , after: String.drop (String.length before) s + , after: CU.drop (CU.length before) s } --- | Returns a string containing the given number of code points from the --- | beginning of the given string. If the string does not have that many code --- | points, returns the empty string. Operates in constant space and in time --- | linear to the given number. --- | --- | ```purescript --- | >>> take 3 "b 𝐀𝐀 c 𝐀" --- | "b 𝐀" --- | -- compare to Data.String: --- | >>> take 3 "b 𝐀𝐀 c 𝐀" --- | "b �" --- | ``` --- | -take :: Int -> String -> String -take = _take takeFallback - -foreign import _take :: (Int -> String -> String) -> Int -> String -> String - -takeFallback :: Int -> String -> String -takeFallback n _ | n < 1 = "" -takeFallback n s = case uncons s of - Just { head, tail } -> singleton head <> takeFallback (n - 1) tail - _ -> s +unsurrogate :: Int -> Int -> CodePoint +unsurrogate lead trail = CodePoint ((lead - 0xD800) * 0x400 + (trail - 0xDC00) + 0x10000) +isLead :: Int -> Boolean +isLead cu = 0xD800 <= cu && cu <= 0xDBFF --- | Returns a string containing the leading sequence of code points which all --- | match the given predicate from the string. Operates in constant space and --- | in time linear to the length of the string. --- | --- | ```purescript --- | >>> takeWhile (\c -> codePointToInt c == 0x1D400) "𝐀𝐀 b c 𝐀" --- | "𝐀𝐀" --- | ``` --- | -takeWhile :: (CodePoint -> Boolean) -> String -> String -takeWhile p s = take (countPrefix p s) s +isTrail :: Int -> Boolean +isTrail cu = 0xDC00 <= cu && cu <= 0xDFFF +fromCharCode :: Int -> String +fromCharCode = CU.singleton <<< toEnumWithDefaults bottom top --- | Creates an array of code points from a string. Operates in space and time --- | linear to the length of the string. --- | --- | ```purescript --- | >>> codePointArray = toCodePointArray "b 𝐀𝐀" --- | >>> codePointArray --- | [CodePoint 0x62, CodePoint 0x20, CodePoint 0x1D400, CodePoint 0x1D400] --- | >>> map singleton codePointArray --- | ["b", " ", "𝐀", "𝐀"] --- | ``` --- | -toCodePointArray :: String -> Array CodePoint -toCodePointArray = _toCodePointArray toCodePointArrayFallback unsafeCodePointAt0 +-- WARN: this function expects the String parameter to be non-empty +unsafeCodePointAt0 :: String -> CodePoint +unsafeCodePointAt0 = _unsafeCodePointAt0 unsafeCodePointAt0Fallback -foreign import _toCodePointArray - :: (String -> Array CodePoint) - -> (String -> CodePoint) +foreign import _unsafeCodePointAt0 + :: (String -> CodePoint) -> String - -> Array CodePoint - -toCodePointArrayFallback :: String -> Array CodePoint -toCodePointArrayFallback s = unfoldr unconsButWithTuple s - -unconsButWithTuple :: String -> Maybe (Tuple CodePoint String) -unconsButWithTuple s = (\{ head, tail } -> Tuple head tail) <$> uncons s - + -> CodePoint --- | Returns a record with the first code point and the remaining code points --- | of the string. Returns Nothing if the string is empty. Operates in --- | constant space and time. --- | --- | ```purescript --- | >>> uncons "𝐀𝐀 c 𝐀" --- | Just { head: CodePoint 0x1D400, tail: "𝐀 c 𝐀" } --- | >>> uncons "" --- | Nothing --- | ``` --- | -uncons :: String -> Maybe { head :: CodePoint, tail :: String } -uncons s = case String.length s of - 0 -> Nothing - 1 -> Just { head: CodePoint (Unsafe.charCodeAt 0 s), tail: "" } - _ -> - let cu0 = Unsafe.charCodeAt 0 s in - let cu1 = Unsafe.charCodeAt 1 s in +unsafeCodePointAt0Fallback :: String -> CodePoint +unsafeCodePointAt0Fallback s = + let + cu0 = fromEnum (Unsafe.charAt 0 s) + cu1 = fromEnum (Unsafe.charAt 1 s) + in if isLead cu0 && isTrail cu1 - then Just { head: unsurrogate cu0 cu1, tail: String.drop 2 s } - else Just { head: CodePoint cu0, tail: String.drop 1 s } + then unsurrogate cu0 cu1 + else CodePoint cu0 diff --git a/src/Data/String/CodeUnits.js b/src/Data/String/CodeUnits.js index 425bb47..6590986 100644 --- a/src/Data/String/CodeUnits.js +++ b/src/Data/String/CodeUnits.js @@ -4,6 +4,10 @@ exports.fromCharArray = function (a) { return a.join(""); }; +exports.toCharArray = function (s) { + return s.split(""); +}; + exports.singleton = function (c) { return c; }; @@ -26,10 +30,6 @@ exports._toChar = function (just) { }; }; -exports.toCharArray = function (s) { - return s.split(""); -}; - exports.length = function (s) { return s.length; }; diff --git a/src/Data/String/CodeUnits.purs b/src/Data/String/CodeUnits.purs index 3c5ac9c..b67a639 100644 --- a/src/Data/String/CodeUnits.purs +++ b/src/Data/String/CodeUnits.purs @@ -1,12 +1,14 @@ --- | Wraps the functions of Javascript's `String` object. --- | A String represents a sequence of characters. --- | For details of the underlying implementation, see [String Reference at MDN](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String). module Data.String.CodeUnits - ( singleton + ( module Data.String.Pattern + , module Data.String.Common + , stripPrefix + , stripSuffix + , contains + , singleton , fromCharArray + , toCharArray , charAt , toChar - , toCharArray , uncons , length , countPrefix @@ -26,10 +28,57 @@ module Data.String.CodeUnits import Prelude -import Data.Maybe (Maybe(..)) -import Data.String.Pattern (Pattern) +import Data.Maybe (Maybe(..), isJust) +import Data.String.Common (joinWith, localeCompare, null, replace, replaceAll, split, toLower, toUpper, trim) +import Data.String.Pattern (Pattern(..), Replacement(..)) import Data.String.Unsafe as U +------------------------------------------------------------------------------- +-- `stripPrefix`, `stripSuffix`, and `contains` are CodeUnit/CodePoint agnostic +-- as they are based on patterns rather than lengths/indices, but they need to +-- be defined in here to avoid a circular module dependency +------------------------------------------------------------------------------- + +-- | If the string starts with the given prefix, return the portion of the +-- | string left after removing it, as a Just value. Otherwise, return Nothing. +-- | +-- | ```purescript +-- | stripPrefix (Pattern "http:") "http://purescript.org" == Just "//purescript.org" +-- | stripPrefix (Pattern "http:") "https://purescript.org" == Nothing +-- | ``` +stripPrefix :: Pattern -> String -> Maybe String +stripPrefix prefix@(Pattern prefixS) str = + case indexOf prefix str of + Just 0 -> Just $ drop (length prefixS) str + _ -> Nothing + +-- | If the string ends with the given suffix, return the portion of the +-- | string left after removing it, as a `Just` value. Otherwise, return +-- | `Nothing`. +-- | +-- | ```purescript +-- | stripSuffix (Pattern ".exe") "psc.exe" == Just "psc" +-- | stripSuffix (Pattern ".exe") "psc" == Nothing +-- | ``` +stripSuffix :: Pattern -> String -> Maybe String +stripSuffix suffix@(Pattern suffixS) str = + case lastIndexOf suffix str of + Just x | x == length str - length suffixS -> Just $ take x str + _ -> Nothing + +-- | Checks whether the pattern appears in the given string. +-- | +-- | ```purescript +-- | contains (Pattern "needle") "haystack with needle" == true +-- | contains (Pattern "needle") "haystack" == false +-- | ``` +contains :: Pattern -> String -> Boolean +contains pat = isJust <<< indexOf pat + +------------------------------------------------------------------------------- +-- all functions past this point are CodeUnit specific +------------------------------------------------------------------------------- + -- | Returns a string of length `1` containing the given character. -- | -- | ```purescript @@ -45,6 +94,13 @@ foreign import singleton :: Char -> String -- | ``` foreign import fromCharArray :: Array Char -> String +-- | Converts the string into an array of characters. +-- | +-- | ```purescript +-- | toCharArray "Hello☺\n" == ['H','e','l','l','o','☺','\n'] +-- | ``` +foreign import toCharArray :: String -> Array Char + -- | Returns the character at the given index, if the index is within bounds. -- | -- | ```purescript @@ -78,13 +134,6 @@ foreign import _toChar -> String -> Maybe Char --- | Converts the string into an array of characters. --- | --- | ```purescript --- | toCharArray "Hello☺\n" == ['H','e','l','l','o','☺','\n'] --- | ``` -foreign import toCharArray :: String -> Array Char - -- | Returns the first character and the rest of the string, -- | if the string is not empty. -- | diff --git a/src/Data/String.js b/src/Data/String/Common.js similarity index 100% rename from src/Data/String.js rename to src/Data/String/Common.js diff --git a/src/Data/String/Common.purs b/src/Data/String/Common.purs new file mode 100644 index 0000000..9e3132e --- /dev/null +++ b/src/Data/String/Common.purs @@ -0,0 +1,96 @@ +module Data.String.Common + ( null + , localeCompare + , replace + , replaceAll + , split + , toLower + , toUpper + , trim + , joinWith + ) where + +import Prelude + +import Data.String.Pattern (Pattern, Replacement) + +-- | Returns `true` if the given string is empty. +-- | +-- | ```purescript +-- | null "" == true +-- | null "Hi" == false +-- | ``` +null :: String -> Boolean +null s = s == "" + +-- | Compare two strings in a locale-aware fashion. This is in contrast to +-- | the `Ord` instance on `String` which treats strings as arrays of code +-- | units: +-- | +-- | ```purescript +-- | "ä" `localeCompare` "b" == LT +-- | "ä" `compare` "b" == GT +-- | ``` +localeCompare :: String -> String -> Ordering +localeCompare = _localeCompare LT EQ GT + +foreign import _localeCompare + :: Ordering + -> Ordering + -> Ordering + -> String + -> String + -> Ordering + +-- | Replaces the first occurence of the pattern with the replacement string. +-- | +-- | ```purescript +-- | replace (Pattern "<=") (Replacement "≤") "a <= b <= c" == "a ≤ b <= c" +-- | ``` +foreign import replace :: Pattern -> Replacement -> String -> String + +-- | Replaces all occurences of the pattern with the replacement string. +-- | +-- | ```purescript +-- | replaceAll (Pattern "<=") (Replacement "≤") "a <= b <= c" == "a ≤ b ≤ c" +-- | ``` +foreign import replaceAll :: Pattern -> Replacement -> String -> String + +-- | Returns the substrings of the second string separated along occurences +-- | of the first string. +-- | +-- | ```purescript +-- | split (Pattern " ") "hello world" == ["hello", "world"] +-- | ``` +foreign import split :: Pattern -> String -> Array String + +-- | Returns the argument converted to lowercase. +-- | +-- | ```purescript +-- | toLower "hElLo" == "hello" +-- | ``` +foreign import toLower :: String -> String + +-- | Returns the argument converted to uppercase. +-- | +-- | ```purescript +-- | toUpper "Hello" == "HELLO" +-- | ``` +foreign import toUpper :: String -> String + +-- | Removes whitespace from the beginning and end of a string, including +-- | [whitespace characters](http://www.ecma-international.org/ecma-262/5.1/#sec-7.2) +-- | and [line terminators](http://www.ecma-international.org/ecma-262/5.1/#sec-7.3). +-- | +-- | ```purescript +-- | trim " Hello \n World\n\t " == "Hello \n World" +-- | ``` +foreign import trim :: String -> String + +-- | Joins the strings in the array together, inserting the first argument +-- | as separator between them. +-- | +-- | ```purescript +-- | joinWith ", " ["apple", "banana", "orange"] == "apple, banana, orange" +-- | ``` +foreign import joinWith :: String -> Array String -> String diff --git a/src/Data/String/NonEmpty.purs b/src/Data/String/NonEmpty.purs new file mode 100644 index 0000000..02988a1 --- /dev/null +++ b/src/Data/String/NonEmpty.purs @@ -0,0 +1,241 @@ +module Data.String.NonEmpty + ( NonEmptyString + , class MakeNonEmpty, nes + , NonEmptyReplacement(..) + , fromString + , unsafeFromString + , toString + , appendString + , prependString + , contains + , localeCompare + , replace + , replaceAll + , stripPrefix + , stripSuffix + , toLower + , toUpper + , trim + , joinWith + , join1With + , joinWith1 + , module Data.String.Pattern + ) where + +import Prelude + +import Data.Foldable (class Foldable) +import Data.Foldable as F +import Data.Maybe (Maybe(..), fromJust) +import Data.Semigroup.Foldable (class Foldable1) +import Data.String as String +import Data.String.Pattern (Pattern(..)) +import Data.Symbol (class IsSymbol, SProxy, reflectSymbol) +import Prim.TypeError as TE +import Unsafe.Coerce (unsafeCoerce) + +-- | A string that is known not to be empty. +newtype NonEmptyString = NonEmptyString String + +derive newtype instance eqNonEmptyString ∷ Eq NonEmptyString +derive newtype instance ordNonEmptyString ∷ Ord NonEmptyString +derive newtype instance semigroupNonEmptyString ∷ Semigroup NonEmptyString + +instance showNonEmptyString :: Show NonEmptyString where + show (NonEmptyString s) = "(NonEmptyString.unsafeFromString " <> show s <> ")" + +-- | A helper class for defining non-empty string values at compile time. +-- | +-- | ``` purescript +-- | something :: NonEmptyString +-- | something = nes (SProxy :: SProxy "something") +-- | ``` +class MakeNonEmpty (s :: Symbol) where + nes :: SProxy s -> NonEmptyString + +instance makeNonEmptyBad :: TE.Fail (TE.Text "Cannot create an NonEmptyString from an empty Symbol") => MakeNonEmpty "" where + nes _ = NonEmptyString "" + +else instance nonEmptyNonEmpty :: IsSymbol s => MakeNonEmpty s where + nes p = NonEmptyString (reflectSymbol p) + +-- | A newtype used in cases to specify a non-empty replacement for a pattern. +newtype NonEmptyReplacement = NonEmptyReplacement NonEmptyString + +derive newtype instance eqNonEmptyReplacement :: Eq NonEmptyReplacement +derive newtype instance ordNonEmptyReplacement :: Ord NonEmptyReplacement +derive newtype instance semigroupNonEmptyReplacement ∷ Semigroup NonEmptyReplacement + +instance showNonEmptyReplacement :: Show NonEmptyReplacement where + show (NonEmptyReplacement s) = "(NonEmptyReplacement " <> show s <> ")" + +-- | Creates a `NonEmptyString` from a `String`, returning `Nothing` if the +-- | input is empty. +-- | +-- | ```purescript +-- | fromString "" = Nothing +-- | fromString "hello" = Just (NES.unsafeFromString "hello") +-- | ``` +fromString :: String -> Maybe NonEmptyString +fromString = case _ of + "" -> Nothing + s -> Just (NonEmptyString s) + +-- | A partial version of `fromString`. +unsafeFromString :: Partial => String -> NonEmptyString +unsafeFromString = fromJust <<< fromString + +-- | Converts a `NonEmptyString` back into a standard `String`. +toString :: NonEmptyString -> String +toString (NonEmptyString s) = s + +-- | Appends a string to this non-empty string. Since one of the strings is +-- | non-empty we know the result will be too. +-- | +-- | ```purescript +-- | appendString (NonEmptyString "Hello") " world" == NonEmptyString "Hello world" +-- | appendString (NonEmptyString "Hello") "" == NonEmptyString "Hello" +-- | ``` +appendString :: NonEmptyString -> String -> NonEmptyString +appendString (NonEmptyString s1) s2 = NonEmptyString (s1 <> s2) + +-- | Prepends a string to this non-empty string. Since one of the strings is +-- | non-empty we know the result will be too. +-- | +-- | ```purescript +-- | prependString "be" (NonEmptyString "fore") == NonEmptyString "before" +-- | prependString "" (NonEmptyString "fore") == NonEmptyString "fore" +-- | ``` +prependString :: String -> NonEmptyString -> NonEmptyString +prependString s1 (NonEmptyString s2) = NonEmptyString (s1 <> s2) + +-- | If the string starts with the given prefix, return the portion of the +-- | string left after removing it. If the prefix does not match or there is no +-- | remainder, the result will be `Nothing`. +-- | +-- | ```purescript +-- | stripPrefix (Pattern "http:") (NonEmptyString "http://purescript.org") == Just (NonEmptyString "//purescript.org") +-- | stripPrefix (Pattern "http:") (NonEmptyString "https://purescript.org") == Nothing +-- | stripPrefix (Pattern "Hello!") (NonEmptyString "Hello!") == Nothing +-- | ``` +stripPrefix :: Pattern -> NonEmptyString -> Maybe NonEmptyString +stripPrefix pat = fromString <=< liftS (String.stripPrefix pat) + +-- | If the string ends with the given suffix, return the portion of the +-- | string left after removing it. If the suffix does not match or there is no +-- | remainder, the result will be `Nothing`. +-- | +-- | ```purescript +-- | stripSuffix (Pattern ".exe") (NonEmptyString "purs.exe") == Just (NonEmptyString "purs") +-- | stripSuffix (Pattern ".exe") (NonEmptyString "purs") == Nothing +-- | stripSuffix (Pattern "Hello!") (NonEmptyString "Hello!") == Nothing +-- | ``` +stripSuffix :: Pattern -> NonEmptyString -> Maybe NonEmptyString +stripSuffix pat = fromString <=< liftS (String.stripSuffix pat) + +-- | Checks whether the pattern appears in the given string. +-- | +-- | ```purescript +-- | contains (Pattern "needle") (NonEmptyString "haystack with needle") == true +-- | contains (Pattern "needle") (NonEmptyString "haystack") == false +-- | ``` +contains :: Pattern -> NonEmptyString -> Boolean +contains = liftS <<< String.contains + +-- | Compare two strings in a locale-aware fashion. This is in contrast to +-- | the `Ord` instance on `String` which treats strings as arrays of code +-- | units: +-- | +-- | ```purescript +-- | NonEmptyString "ä" `localeCompare` NonEmptyString "b" == LT +-- | NonEmptyString "ä" `compare` NonEmptyString "b" == GT +-- | ``` +localeCompare :: NonEmptyString -> NonEmptyString -> Ordering +localeCompare (NonEmptyString a) (NonEmptyString b) = String.localeCompare a b + +-- | Replaces the first occurence of the pattern with the replacement string. +-- | +-- | ```purescript +-- | replace (Pattern "<=") (NonEmptyReplacement "≤") (NonEmptyString "a <= b <= c") == NonEmptyString "a ≤ b <= c" +-- | ``` +replace :: Pattern -> NonEmptyReplacement -> NonEmptyString -> NonEmptyString +replace pat (NonEmptyReplacement (NonEmptyString rep)) (NonEmptyString s) = + NonEmptyString (String.replace pat (String.Replacement rep) s) + +-- | Replaces all occurences of the pattern with the replacement string. +-- | +-- | ```purescript +-- | replaceAll (Pattern "<=") (NonEmptyReplacement "≤") (NonEmptyString "a <= b <= c") == NonEmptyString "a ≤ b ≤ c" +-- | ``` +replaceAll :: Pattern -> NonEmptyReplacement -> NonEmptyString -> NonEmptyString +replaceAll pat (NonEmptyReplacement (NonEmptyString rep)) (NonEmptyString s) = + NonEmptyString (String.replaceAll pat (String.Replacement rep) s) + +-- | Returns the argument converted to lowercase. +-- | +-- | ```purescript +-- | toLower (NonEmptyString "hElLo") == NonEmptyString "hello" +-- | ``` +toLower :: NonEmptyString -> NonEmptyString +toLower (NonEmptyString s) = NonEmptyString (String.toLower s) + +-- | Returns the argument converted to uppercase. +-- | +-- | ```purescript +-- | toUpper (NonEmptyString "Hello") == NonEmptyString "HELLO" +-- | ``` +toUpper :: NonEmptyString -> NonEmptyString +toUpper (NonEmptyString s) = NonEmptyString (String.toUpper s) + +-- | Removes whitespace from the beginning and end of a string, including +-- | [whitespace characters](http://www.ecma-international.org/ecma-262/5.1/#sec-7.2) +-- | and [line terminators](http://www.ecma-international.org/ecma-262/5.1/#sec-7.3). +-- | If the string is entirely made up of whitespace the result will be Nothing. +-- | +-- | ```purescript +-- | trim (NonEmptyString " Hello \n World\n\t ") == Just (NonEmptyString "Hello \n World") +-- | trim (NonEmptyString " \n") == Nothing +-- | ``` +trim :: NonEmptyString -> Maybe NonEmptyString +trim (NonEmptyString s) = fromString (String.trim s) + +-- | Joins the strings in a container together as a new string, inserting the +-- | first argument as separator between them. The result is not guaranteed to +-- | be non-empty. +-- | +-- | ```purescript +-- | joinWith ", " [NonEmptyString "apple", NonEmptyString "banana"] == "apple, banana" +-- | joinWith ", " [] == "" +-- | ``` +joinWith :: forall f. Foldable f => String -> f NonEmptyString -> String +joinWith splice = F.intercalate splice <<< coe + where + coe :: f NonEmptyString -> f String + coe = unsafeCoerce + +-- | Joins non-empty strings in a non-empty container together as a new +-- | non-empty string, inserting a possibly empty string as separator between +-- | them. The result is guaranteed to be non-empty. +-- | +-- | ```purescript +-- | -- array syntax is used for demonstration here, it would need to be a real `Foldable1` +-- | join1With ", " [NonEmptyString "apple", NonEmptyString "banana"] == NonEmptyString "apple, banana" +-- | join1With "" [NonEmptyString "apple", NonEmptyString "banana"] == NonEmptyString "applebanana" +-- | ``` +join1With :: forall f. Foldable1 f => String -> f NonEmptyString -> NonEmptyString +join1With splice = NonEmptyString <<< joinWith splice + +-- | Joins possibly empty strings in a non-empty container together as a new +-- | non-empty string, inserting a non-empty string as a separator between them. +-- | The result is guaranteed to be non-empty. +-- | +-- | ```purescript +-- | -- array syntax is used for demonstration here, it would need to be a real `Foldable1` +-- | joinWith1 (NonEmptyString ", ") ["apple", "banana"] == NonEmptyString "apple, banana" +-- | joinWith1 (NonEmptyString "/") ["a", "b", "", "c", ""] == NonEmptyString "a/b//c/" +-- | ``` +joinWith1 :: forall f. Foldable1 f => NonEmptyString -> f String -> NonEmptyString +joinWith1 (NonEmptyString splice) = NonEmptyString <<< F.intercalate splice + +liftS :: forall r. (String -> r) -> NonEmptyString -> r +liftS f (NonEmptyString s) = f s diff --git a/src/Data/String/NonEmpty/CaseInsensitive.purs b/src/Data/String/NonEmpty/CaseInsensitive.purs new file mode 100644 index 0000000..d1c1719 --- /dev/null +++ b/src/Data/String/NonEmpty/CaseInsensitive.purs @@ -0,0 +1,22 @@ +module Data.String.NonEmpty.CaseInsensitive where + +import Prelude + +import Data.Newtype (class Newtype) +import Data.String.NonEmpty (NonEmptyString, toLower) + +-- | A newtype for case insensitive string comparisons and ordering. +newtype CaseInsensitiveNonEmptyString = CaseInsensitiveNonEmptyString NonEmptyString + +instance eqCaseInsensitiveNonEmptyString :: Eq CaseInsensitiveNonEmptyString where + eq (CaseInsensitiveNonEmptyString s1) (CaseInsensitiveNonEmptyString s2) = + toLower s1 == toLower s2 + +instance ordCaseInsensitiveNonEmptyString :: Ord CaseInsensitiveNonEmptyString where + compare (CaseInsensitiveNonEmptyString s1) (CaseInsensitiveNonEmptyString s2) = + compare (toLower s1) (toLower s2) + +instance showCaseInsensitiveNonEmptyString :: Show CaseInsensitiveNonEmptyString where + show (CaseInsensitiveNonEmptyString s) = "(CaseInsensitiveNonEmptyString " <> show s <> ")" + +derive instance newtypeCaseInsensitiveNonEmptyString :: Newtype CaseInsensitiveNonEmptyString _ diff --git a/src/Data/String/NonEmpty/CodePoints.purs b/src/Data/String/NonEmpty/CodePoints.purs new file mode 100644 index 0000000..5ec1254 --- /dev/null +++ b/src/Data/String/NonEmpty/CodePoints.purs @@ -0,0 +1,136 @@ +module Data.String.NonEmpty.CodePoints + ( module Data.String.NonEmpty + , fromCodePointArray + , fromNonEmptyCodePointArray + , singleton + , cons + , snoc + , fromFoldable1 + , toCodePointArray + , toNonEmptyCodePointArray + , codePointAt + , indexOf + , indexOf' + , lastIndexOf + , lastIndexOf' + , uncons + , length + , take + -- takeRight + , takeWhile + , drop + -- dropRight + , dropWhile + , countPrefix + , splitAt + ) where + +import Prelude + +import Data.Array.NonEmpty (NonEmptyArray) +import Data.Array.NonEmpty as NEA +import Data.Maybe (Maybe(..), fromJust) +import Data.Semigroup.Foldable (class Foldable1) +import Data.Semigroup.Foldable as F1 +import Data.String.CodePoints (CodePoint) +import Data.String.CodePoints as CP +import Data.String.NonEmpty (class MakeNonEmpty, NonEmptyReplacement(..), NonEmptyString, Pattern(..), appendString, contains, fromString, join1With, joinWith, joinWith1, localeCompare, nes, prependString, replace, replaceAll, stripPrefix, stripSuffix, toLower, toString, toUpper, trim, unsafeFromString) +import Partial.Unsafe (unsafePartial) +import Unsafe.Coerce (unsafeCoerce) + +toNonEmptyString :: String -> NonEmptyString +toNonEmptyString = unsafeCoerce + +fromNonEmptyString :: NonEmptyString -> String +fromNonEmptyString = unsafeCoerce + +liftS :: forall r. (String -> r) -> NonEmptyString -> r +liftS = unsafeCoerce + +fromCodePointArray :: Array CodePoint -> Maybe NonEmptyString +fromCodePointArray = case _ of + [] -> Nothing + cs -> Just (toNonEmptyString (CP.fromCodePointArray cs)) + +fromNonEmptyCodePointArray :: NonEmptyArray CodePoint -> NonEmptyString +fromNonEmptyCodePointArray = unsafePartial fromJust <<< fromCodePointArray <<< NEA.toArray + +singleton :: CodePoint -> NonEmptyString +singleton = toNonEmptyString <<< CP.singleton + +cons :: CodePoint -> String -> NonEmptyString +cons c s = toNonEmptyString (CP.singleton c <> s) + +snoc :: CodePoint -> String -> NonEmptyString +snoc c s = toNonEmptyString (s <> CP.singleton c) + +fromFoldable1 :: forall f. Foldable1 f => f CodePoint -> NonEmptyString +fromFoldable1 = F1.foldMap1 singleton + +toCodePointArray :: NonEmptyString -> Array CodePoint +toCodePointArray = CP.toCodePointArray <<< fromNonEmptyString + +toNonEmptyCodePointArray :: NonEmptyString -> NonEmptyArray CodePoint +toNonEmptyCodePointArray = unsafePartial fromJust <<< NEA.fromArray <<< toCodePointArray + +codePointAt :: Int -> NonEmptyString -> Maybe CodePoint +codePointAt = liftS <<< CP.codePointAt + +indexOf :: Pattern -> NonEmptyString -> Maybe Int +indexOf = liftS <<< CP.indexOf + +indexOf' :: Pattern -> Int -> NonEmptyString -> Maybe Int +indexOf' pat = liftS <<< CP.indexOf' pat + +lastIndexOf :: Pattern -> NonEmptyString -> Maybe Int +lastIndexOf = liftS <<< CP.lastIndexOf + +lastIndexOf' :: Pattern -> Int -> NonEmptyString -> Maybe Int +lastIndexOf' pat = liftS <<< CP.lastIndexOf' pat + +uncons :: NonEmptyString -> { head :: CodePoint, tail :: Maybe NonEmptyString } +uncons nes = + let + s = fromNonEmptyString nes + in + { head: unsafePartial fromJust (CP.codePointAt 0 s) + , tail: fromString (CP.drop 1 s) + } + +length :: NonEmptyString -> Int +length = CP.length <<< fromNonEmptyString + +take :: Int -> NonEmptyString -> Maybe NonEmptyString +take i nes = + let + s = fromNonEmptyString nes + in + if i < 1 + then Nothing + else Just (toNonEmptyString (CP.take i s)) + +takeWhile :: (CodePoint -> Boolean) -> NonEmptyString -> Maybe NonEmptyString +takeWhile f = fromString <<< liftS (CP.takeWhile f) + +drop :: Int -> NonEmptyString -> Maybe NonEmptyString +drop i nes = + let + s = fromNonEmptyString nes + in + if i >= CP.length s + then Nothing + else Just (toNonEmptyString (CP.drop i s)) + +dropWhile :: (CodePoint -> Boolean) -> NonEmptyString -> Maybe NonEmptyString +dropWhile f = fromString <<< liftS (CP.dropWhile f) + +countPrefix :: (CodePoint -> Boolean) -> NonEmptyString -> Int +countPrefix = liftS <<< CP.countPrefix + +splitAt + :: Int + -> NonEmptyString + -> { before :: Maybe NonEmptyString, after :: Maybe NonEmptyString } +splitAt i nes = + case CP.splitAt i (fromNonEmptyString nes) of + { before, after } -> { before: fromString before, after: fromString after } diff --git a/src/Data/String/NonEmpty/CodeUnits.purs b/src/Data/String/NonEmpty/CodeUnits.purs new file mode 100644 index 0000000..0a826f0 --- /dev/null +++ b/src/Data/String/NonEmpty/CodeUnits.purs @@ -0,0 +1,298 @@ +module Data.String.NonEmpty.CodeUnits + ( module Data.String.NonEmpty + , fromCharArray + , fromNonEmptyCharArray + , singleton + , cons + , snoc + , fromFoldable1 + , toCharArray + , toNonEmptyCharArray + , charAt + , toChar + , indexOf + , indexOf' + , lastIndexOf + , lastIndexOf' + , uncons + , length + , take + , takeRight + , takeWhile + , drop + , dropRight + , dropWhile + , countPrefix + , splitAt + ) where + +import Prelude + +import Data.Array.NonEmpty (NonEmptyArray) +import Data.Array.NonEmpty as NEA +import Data.Maybe (Maybe(..), fromJust) +import Data.Semigroup.Foldable (class Foldable1) +import Data.Semigroup.Foldable as F1 +import Data.String.CodeUnits as CU +import Data.String.Unsafe as U +import Data.String.NonEmpty (class MakeNonEmpty, NonEmptyReplacement(..), NonEmptyString, Pattern(..), appendString, contains, fromString, join1With, joinWith, joinWith1, localeCompare, nes, prependString, replace, replaceAll, stripPrefix, stripSuffix, toLower, toString, toUpper, trim, unsafeFromString) +import Partial.Unsafe (unsafePartial) +import Unsafe.Coerce (unsafeCoerce) + +toNonEmptyString :: String -> NonEmptyString +toNonEmptyString = unsafeCoerce + +fromNonEmptyString :: NonEmptyString -> String +fromNonEmptyString = unsafeCoerce + +liftS :: forall r. (String -> r) -> NonEmptyString -> r +liftS = unsafeCoerce + +-- | Creates a `NonEmptyString` from a character array `String`, returning +-- | `Nothing` if the input is empty. +-- | +-- | ```purescript +-- | fromCharArray [] = Nothing +-- | fromCharArray ['a', 'b', 'c'] = Just (NonEmptyString "abc") +-- | ``` +fromCharArray :: Array Char -> Maybe NonEmptyString +fromCharArray = case _ of + [] -> Nothing + cs -> Just (toNonEmptyString (CU.fromCharArray cs)) + +fromNonEmptyCharArray :: NonEmptyArray Char -> NonEmptyString +fromNonEmptyCharArray = unsafePartial fromJust <<< fromCharArray <<< NEA.toArray + +-- | Creates a `NonEmptyString` from a character. +singleton :: Char -> NonEmptyString +singleton = toNonEmptyString <<< CU.singleton + +-- | Creates a `NonEmptyString` from a string by prepending a character. +-- | +-- | ```purescript +-- | cons 'a' "bc" = NonEmptyString "abc" +-- | cons 'a' "" = NonEmptyString "a" +-- | ``` +cons :: Char -> String -> NonEmptyString +cons c s = toNonEmptyString (CU.singleton c <> s) + +-- | Creates a `NonEmptyString` from a string by appending a character. +-- | +-- | ```purescript +-- | snoc 'c' "ab" = NonEmptyString "abc" +-- | snoc 'a' "" = NonEmptyString "a" +-- | ``` +snoc :: Char -> String -> NonEmptyString +snoc c s = toNonEmptyString (s <> CU.singleton c) + +-- | Creates a `NonEmptyString` from a `Foldable1` container carrying +-- | characters. +fromFoldable1 :: forall f. Foldable1 f => f Char -> NonEmptyString +fromFoldable1 = F1.fold1 <<< coe + where + coe ∷ f Char -> f NonEmptyString + coe = unsafeCoerce + +-- | Converts the `NonEmptyString` into an array of characters. +-- | +-- | ```purescript +-- | toCharArray (NonEmptyString "Hello☺\n") == ['H','e','l','l','o','☺','\n'] +-- | ``` +toCharArray :: NonEmptyString -> Array Char +toCharArray = CU.toCharArray <<< fromNonEmptyString + +-- | Converts the `NonEmptyString` into a non-empty array of characters. +toNonEmptyCharArray :: NonEmptyString -> NonEmptyArray Char +toNonEmptyCharArray = unsafePartial fromJust <<< NEA.fromArray <<< toCharArray + +-- | Returns the character at the given index, if the index is within bounds. +-- | +-- | ```purescript +-- | charAt 2 (NonEmptyString "Hello") == Just 'l' +-- | charAt 10 (NonEmptyString "Hello") == Nothing +-- | ``` +charAt :: Int -> NonEmptyString -> Maybe Char +charAt = liftS <<< CU.charAt + +-- | Converts the `NonEmptyString` to a character, if the length of the string +-- | is exactly `1`. +-- | +-- | ```purescript +-- | toChar "H" == Just 'H' +-- | toChar "Hi" == Nothing +-- | ``` +toChar :: NonEmptyString -> Maybe Char +toChar = CU.toChar <<< fromNonEmptyString + +-- | Returns the index of the first occurrence of the pattern in the +-- | given string. Returns `Nothing` if there is no match. +-- | +-- | ```purescript +-- | indexOf (Pattern "c") (NonEmptyString "abcdc") == Just 2 +-- | indexOf (Pattern "c") (NonEmptyString "aaa") == Nothing +-- | ``` +indexOf :: Pattern -> NonEmptyString -> Maybe Int +indexOf = liftS <<< CU.indexOf + +-- | Returns the index of the first occurrence of the pattern in the +-- | given string, starting at the specified index. Returns `Nothing` if there is +-- | no match. +-- | +-- | ```purescript +-- | indexOf' (Pattern "a") 2 (NonEmptyString "ababa") == Just 2 +-- | indexOf' (Pattern "a") 3 (NonEmptyString "ababa") == Just 4 +-- | ``` +indexOf' :: Pattern -> Int -> NonEmptyString -> Maybe Int +indexOf' pat = liftS <<< CU.indexOf' pat + +-- | Returns the index of the last occurrence of the pattern in the +-- | given string. Returns `Nothing` if there is no match. +-- | +-- | ```purescript +-- | lastIndexOf (Pattern "c") (NonEmptyString "abcdc") == Just 4 +-- | lastIndexOf (Pattern "c") (NonEmptyString "aaa") == Nothing +-- | ``` +lastIndexOf :: Pattern -> NonEmptyString -> Maybe Int +lastIndexOf = liftS <<< CU.lastIndexOf + +-- | Returns the index of the last occurrence of the pattern in the +-- | given string, starting at the specified index +-- | and searching backwards towards the beginning of the string. +-- | Returns `Nothing` if there is no match. +-- | +-- | ```purescript +-- | lastIndexOf' (Pattern "a") 1 (NonEmptyString "ababa") == Just 0 +-- | lastIndexOf' (Pattern "a") 3 (NonEmptyString "ababa") == Just 2 +-- | lastIndexOf' (Pattern "a") 4 (NonEmptyString "ababa") == Just 4 +-- | ``` +lastIndexOf' :: Pattern -> Int -> NonEmptyString -> Maybe Int +lastIndexOf' pat = liftS <<< CU.lastIndexOf' pat + +-- | Returns the first character and the rest of the string. +-- | +-- | ```purescript +-- | uncons "a" == { head: 'a', tail: Nothing } +-- | uncons "Hello World" == { head: 'H', tail: Just (NonEmptyString "ello World") } +-- | ``` +uncons :: NonEmptyString -> { head :: Char, tail :: Maybe NonEmptyString } +uncons nes = + let + s = fromNonEmptyString nes + in + { head: U.charAt 0 s + , tail: fromString (CU.drop 1 s) + } + +-- | Returns the number of characters the string is composed of. +-- | +-- | ```purescript +-- | length (NonEmptyString "Hello World") == 11 +-- | ``` +length :: NonEmptyString -> Int +length = CU.length <<< fromNonEmptyString + +-- | Returns the first `n` characters of the string. Returns `Nothing` if `n` is +-- | less than 1. +-- | +-- | ```purescript +-- | take 5 (NonEmptyString "Hello World") == Just (NonEmptyString "Hello") +-- | take 0 (NonEmptyString "Hello World") == Nothing +-- | ``` +take :: Int -> NonEmptyString -> Maybe NonEmptyString +take i nes = + let + s = fromNonEmptyString nes + in + if i < 1 + then Nothing + else Just (toNonEmptyString (CU.take i s)) + +-- | Returns the last `n` characters of the string. Returns `Nothing` if `n` is +-- | less than 1. +-- | +-- | ```purescript +-- | take 5 (NonEmptyString "Hello World") == Just (NonEmptyString "World") +-- | take 0 (NonEmptyString "Hello World") == Nothing +-- | ``` +takeRight :: Int -> NonEmptyString -> Maybe NonEmptyString +takeRight i nes = + let + s = fromNonEmptyString nes + in + if i < 1 + then Nothing + else Just (toNonEmptyString (CU.takeRight i s)) + +-- | Returns the longest prefix of characters that satisfy the predicate. +-- | `Nothing` is returned if there is no matching prefix. +-- | +-- | ```purescript +-- | takeWhile (_ /= ':') (NonEmptyString "http://purescript.org") == Just (NonEmptyString "http") +-- | takeWhile (_ == 'a') (NonEmptyString "xyz") == Nothing +-- | ``` +takeWhile :: (Char -> Boolean) -> NonEmptyString -> Maybe NonEmptyString +takeWhile f = fromString <<< liftS (CU.takeWhile f) + +-- | Returns the string without the first `n` characters. Returns `Nothing` if +-- | more characters are dropped than the string is long. +-- | +-- | ```purescript +-- | drop 6 (NonEmptyString "Hello World") == Just (NonEmptyString "World") +-- | drop 20 (NonEmptyString "Hello World") == Nothing +-- | ``` +drop :: Int -> NonEmptyString -> Maybe NonEmptyString +drop i nes = + let + s = fromNonEmptyString nes + in + if i >= CU.length s + then Nothing + else Just (toNonEmptyString (CU.drop i s)) + +-- | Returns the string without the last `n` characters. Returns `Nothing` if +-- | more characters are dropped than the string is long. +-- | +-- | ```purescript +-- | dropRight 6 (NonEmptyString "Hello World") == Just (NonEmptyString "Hello") +-- | dropRight 20 (NonEmptyString "Hello World") == Nothing +-- | ``` +dropRight :: Int -> NonEmptyString -> Maybe NonEmptyString +dropRight i nes = + let + s = fromNonEmptyString nes + in + if i >= CU.length s + then Nothing + else Just (toNonEmptyString (CU.dropRight i s)) + +-- | Returns the suffix remaining after `takeWhile`. +-- | +-- | ```purescript +-- | dropWhile (_ /= '.') (NonEmptyString "Test.purs") == Just (NonEmptyString ".purs") +-- | ``` +dropWhile :: (Char -> Boolean) -> NonEmptyString -> Maybe NonEmptyString +dropWhile f = fromString <<< liftS (CU.dropWhile f) + +-- | Returns the number of contiguous characters at the beginning of the string +-- | for which the predicate holds. +-- | +-- | ```purescript +-- | countPrefix (_ /= 'o') (NonEmptyString "Hello World") == 4 +-- | ``` +countPrefix :: (Char -> Boolean) -> NonEmptyString -> Int +countPrefix = liftS <<< CU.countPrefix + +-- | Returns the substrings of a split at the given index, if the index is +-- | within bounds. +-- | +-- | ```purescript +-- | splitAt 2 (NonEmptyString "Hello World") == Just { before: Just (NonEmptyString "He"), after: Just (NonEmptyString "llo World") } +-- | splitAt 10 (NonEmptyString "Hi") == Nothing +-- | ``` +splitAt + :: Int + -> NonEmptyString + -> { before :: Maybe NonEmptyString, after :: Maybe NonEmptyString } +splitAt i nes = + case CU.splitAt i (fromNonEmptyString nes) of + { before, after } -> { before: fromString before, after: fromString after } diff --git a/src/Data/String/Unsafe.js b/src/Data/String/Unsafe.js index b54568f..d7a17ca 100644 --- a/src/Data/String/Unsafe.js +++ b/src/Data/String/Unsafe.js @@ -1,12 +1,5 @@ "use strict"; -exports.charCodeAt = function (i) { - return function (s) { - if (i >= 0 && i < s.length) return s.charCodeAt(i); - throw new Error("Data.String.Unsafe.charCodeAt: Invalid index."); - }; -}; - exports.charAt = function (i) { return function (s) { if (i >= 0 && i < s.length) return s.charAt(i); diff --git a/src/Data/String/Unsafe.purs b/src/Data/String/Unsafe.purs index e9874f4..75f5037 100644 --- a/src/Data/String/Unsafe.purs +++ b/src/Data/String/Unsafe.purs @@ -2,14 +2,8 @@ module Data.String.Unsafe ( char , charAt - , charCodeAt ) where --- | Returns the numeric Unicode value of the character at the given index. --- | --- | **Unsafe:** throws runtime exception if the index is out of bounds. -foreign import charCodeAt :: Int -> String -> Int - -- | Returns the character at the given index. -- | -- | **Unsafe:** throws runtime exception if the index is out of bounds. diff --git a/test/Test/Data/Char.purs b/test/Test/Data/Char.purs index 869a2ca..f6d80ea 100644 --- a/test/Test/Data/Char.purs +++ b/test/Test/Data/Char.purs @@ -1,28 +1,31 @@ module Test.Data.Char (testChar) where -import Prelude (Unit, (==), ($), discard) +import Prelude +import Data.Char as C import Effect (Effect) import Effect.Console (log) - -import Data.Char - -import Test.Assert (assert) +import Test.Assert (assertEqual) testChar :: Effect Unit testChar = do - log "toCharCode" - assert $ toCharCode 'a' == 97 - assert $ toCharCode '\n' == 10 - - log "fromCharCode" - assert $ fromCharCode 97 == 'a' - assert $ fromCharCode 10 == '\n' log "toLower" - assert $ toLower 'A' == 'a' - assert $ toLower 'a' == 'a' + assertEqual + { actual: C.toLower 'A' + , expected: 'a' + } + assertEqual + { actual: C.toLower 'a' + , expected: 'a' + } log "toUpper" - assert $ toUpper 'a' == 'A' - assert $ toUpper 'A' == 'A' + assertEqual + { actual: C.toUpper 'a' + , expected: 'A' + } + assertEqual + { actual: C.toUpper 'A' + , expected: 'A' + } diff --git a/test/Test/Data/String.purs b/test/Test/Data/String.purs index 855ee5c..c496798 100644 --- a/test/Test/Data/String.purs +++ b/test/Test/Data/String.purs @@ -1,218 +1,141 @@ module Test.Data.String (testString) where -import Prelude (Unit, Ordering(..), (==), ($), discard, negate, not, (/=), (&&)) +import Prelude +import Data.Maybe (Maybe(..)) +import Data.String as S +import Data.String.Pattern (Pattern(..), Replacement(..)) import Effect (Effect) import Effect.Console (log) - -import Data.Maybe (Maybe(..), isNothing) -import Data.String - -import Test.Assert (assert) +import Test.Assert (assert, assertEqual) testString :: Effect Unit testString = do - log "charAt" - assert $ charAt 0 "" == Nothing - assert $ charAt 0 "a" == Just 'a' - assert $ charAt 1 "a" == Nothing - assert $ charAt 0 "ab" == Just 'a' - assert $ charAt 1 "ab" == Just 'b' - assert $ charAt 2 "ab" == Nothing - - log "singleton" - assert $ singleton 'a' == "a" - - log "charCodeAt" - assert $ charCodeAt 0 "" == Nothing - assert $ charCodeAt 0 "a" == Just 97 - assert $ charCodeAt 1 "a" == Nothing - assert $ charCodeAt 0 "ab" == Just 97 - assert $ charCodeAt 1 "ab" == Just 98 - assert $ charCodeAt 2 "ab" == Nothing - - log "toChar" - assert $ toChar "" == Nothing - assert $ toChar "a" == Just 'a' - assert $ toChar "ab" == Nothing log "null" - assert $ null "" - assert $ not (null "a") - - log "uncons" - assert $ isNothing (uncons "") - assert $ case uncons "a" of - Nothing -> false - Just m -> m.head == 'a' && m.tail == "" - assert $ case uncons "ab" of - Nothing -> false - Just m -> m.head == 'a' && m.tail == "b" - - log "takeWhile" - assert $ takeWhile (\c -> true) "abc" == "abc" - assert $ takeWhile (\c -> false) "abc" == "" - assert $ takeWhile (\c -> c /= 'b') "aabbcc" == "aa" - - log "dropWhile" - assert $ dropWhile (\c -> true) "abc" == "" - assert $ dropWhile (\c -> false) "abc" == "abc" - assert $ dropWhile (\c -> c /= 'b') "aabbcc" == "bbcc" + assert $ S.null "" + assert $ not (S.null "a") log "stripPrefix" - assert $ stripPrefix (Pattern "") "" == Just "" - assert $ stripPrefix (Pattern "") "abc" == Just "abc" - assert $ stripPrefix (Pattern "a") "abc" == Just "bc" - assert $ stripPrefix (Pattern "!") "abc" == Nothing - assert $ stripPrefix (Pattern "!") "" == Nothing - - log "fromCharArray" - assert $ fromCharArray [] == "" - assert $ fromCharArray ['a', 'b'] == "ab" + assertEqual + { actual: S.stripPrefix (Pattern "") "" + , expected: Just "" + } + assertEqual + { actual: S.stripPrefix (Pattern "") "abc" + , expected: Just "abc" + } + assertEqual + { actual: S.stripPrefix (Pattern "a") "abc" + , expected: Just "bc" + } + assertEqual + { actual: S.stripPrefix (Pattern "!") "abc" + , expected: Nothing + } + assertEqual + { actual: S.stripPrefix (Pattern "!") "" + , expected: Nothing + } log "contains" - assert $ contains (Pattern "") "" - assert $ contains (Pattern "") "abcd" - assert $ contains (Pattern "bc") "abcd" - assert $ not (contains (Pattern "cb") "abcd") - - log "indexOf" - assert $ indexOf (Pattern "") "" == Just 0 - assert $ indexOf (Pattern "") "abcd" == Just 0 - assert $ indexOf (Pattern "bc") "abcd" == Just 1 - assert $ indexOf (Pattern "cb") "abcd" == Nothing - - log "indexOf'" - assert $ indexOf' (Pattern "") 0 "" == Just 0 - assert $ indexOf' (Pattern "") (-1) "ab" == Nothing - assert $ indexOf' (Pattern "") 0 "ab" == Just 0 - assert $ indexOf' (Pattern "") 1 "ab" == Just 1 - assert $ indexOf' (Pattern "") 2 "ab" == Just 2 - assert $ indexOf' (Pattern "") 3 "ab" == Nothing - assert $ indexOf' (Pattern "bc") 0 "abcd" == Just 1 - assert $ indexOf' (Pattern "bc") 1 "abcd" == Just 1 - assert $ indexOf' (Pattern "bc") 2 "abcd" == Nothing - assert $ indexOf' (Pattern "cb") 0 "abcd" == Nothing - - log "lastIndexOf" - assert $ lastIndexOf (Pattern "") "" == Just 0 - assert $ lastIndexOf (Pattern "") "abcd" == Just 4 - assert $ lastIndexOf (Pattern "bc") "abcd" == Just 1 - assert $ lastIndexOf (Pattern "cb") "abcd" == Nothing - - log "lastIndexOf'" - assert $ lastIndexOf' (Pattern "") 0 "" == Just 0 - assert $ lastIndexOf' (Pattern "") (-1) "ab" == Nothing - assert $ lastIndexOf' (Pattern "") 0 "ab" == Just 0 - assert $ lastIndexOf' (Pattern "") 1 "ab" == Just 1 - assert $ lastIndexOf' (Pattern "") 2 "ab" == Just 2 - assert $ lastIndexOf' (Pattern "") 3 "ab" == Nothing - assert $ lastIndexOf' (Pattern "bc") 0 "abcd" == Nothing - assert $ lastIndexOf' (Pattern "bc") 1 "abcd" == Just 1 - assert $ lastIndexOf' (Pattern "bc") 2 "abcd" == Just 1 - assert $ lastIndexOf' (Pattern "cb") 0 "abcd" == Nothing - - log "length" - assert $ length "" == 0 - assert $ length "a" == 1 - assert $ length "ab" == 2 + assert $ S.contains (Pattern "") "" + assert $ S.contains (Pattern "") "abcd" + assert $ S.contains (Pattern "bc") "abcd" + assert $ not S.contains (Pattern "cb") "abcd" log "localeCompare" - assert $ localeCompare "" "" == EQ - assert $ localeCompare "a" "a" == EQ - assert $ localeCompare "a" "b" == LT - assert $ localeCompare "b" "a" == GT + assertEqual + { actual: S.localeCompare "" "" + , expected: EQ + } + assertEqual + { actual: S.localeCompare "a" "a" + , expected: EQ + } + assertEqual + { actual: S.localeCompare "a" "b" + , expected: LT + } + assertEqual + { actual: S.localeCompare "b" "a" + , expected: GT + } log "replace" - assert $ replace (Pattern "b") (Replacement "") "abc" == "ac" - assert $ replace (Pattern "b") (Replacement "!") "abc" == "a!c" - assert $ replace (Pattern "d") (Replacement "!") "abc" == "abc" + assertEqual + { actual: S.replace (Pattern "b") (Replacement "") "abc" + , expected: "ac" + } + assertEqual + { actual: S.replace (Pattern "b") (Replacement "!") "abc" + , expected: "a!c" + } + assertEqual + { actual: S.replace (Pattern "d") (Replacement "!") "abc" + , expected: "abc" + } log "replaceAll" - assert $ replaceAll (Pattern "b") (Replacement "") "abbbbbc" == "ac" - assert $ replaceAll (Pattern "[b]") (Replacement "!") "a[b]c" == "a!c" - - log "take" - assert $ take 0 "ab" == "" - assert $ take 1 "ab" == "a" - assert $ take 2 "ab" == "ab" - assert $ take 3 "ab" == "ab" - assert $ take (-1) "ab" == "" - - log "takeRight" - assert $ takeRight 0 "ab" == "" - assert $ takeRight 1 "ab" == "b" - assert $ takeRight 2 "ab" == "ab" - assert $ takeRight 3 "ab" == "ab" - assert $ takeRight (-1) "ab" == "" - - log "drop" - assert $ drop 0 "ab" == "ab" - assert $ drop 1 "ab" == "b" - assert $ drop 2 "ab" == "" - assert $ drop 3 "ab" == "" - assert $ drop (-1) "ab" == "ab" - - log "dropRight" - assert $ dropRight 0 "ab" == "ab" - assert $ dropRight 1 "ab" == "a" - assert $ dropRight 2 "ab" == "" - assert $ dropRight 3 "ab" == "" - assert $ dropRight (-1) "ab" == "ab" - - log "countPrefix" - assert $ countPrefix (_ == 'a') "" == 0 - assert $ countPrefix (_ == 'a') "ab" == 1 - assert $ countPrefix (_ == 'a') "aaab" == 3 - assert $ countPrefix (_ == 'a') "abaa" == 1 + assertEqual + { actual: S.replaceAll (Pattern "b") (Replacement "") "abbbbbc" + , expected: "ac" + } + assertEqual + { actual: S.replaceAll (Pattern "[b]") (Replacement "!") "a[b]c" + , expected: "a!c" + } log "split" - assert $ split (Pattern "") "" == [] - assert $ split (Pattern "") "a" == ["a"] - assert $ split (Pattern "") "ab" == ["a", "b"] - assert $ split (Pattern "b") "aabcc" == ["aa", "cc"] - assert $ split (Pattern "d") "abc" == ["abc"] - - log "splitAt" - let testSplitAt i str r = - assert $ case splitAt i str of - { before, after } -> - r.before == before && r.after == after - - testSplitAt 1 "" {before: "", after: ""} - testSplitAt 0 "a" {before: "", after: "a"} - testSplitAt 1 "a" {before: "a", after: ""} - testSplitAt 1 "ab" {before: "a", after: "b"} - testSplitAt 3 "aabcc" {before: "aab", after: "cc"} - testSplitAt (-1) "abc" {before: "", after: "abc"} - testSplitAt 10 "Hi" {before: "Hi", after: ""} - - log "toCharArray" - assert $ toCharArray "" == [] - assert $ toCharArray "a" == ['a'] - assert $ toCharArray "ab" == ['a', 'b'] + assertEqual + { actual: S.split (Pattern "") "" + , expected: [] + } + assertEqual + { actual: S.split (Pattern "") "a" + , expected: ["a"] + } + assertEqual + { actual: S.split (Pattern "") "ab" + , expected: ["a", "b"] + } + assertEqual + { actual: S.split (Pattern "b") "aabcc" + , expected: ["aa", "cc"] + } + assertEqual + { actual: S.split (Pattern "d") "abc" + , expected: ["abc"] + } log "toLower" - assert $ toLower "bAtMaN" == "batman" + assertEqual + { actual: S.toLower "bAtMaN" + , expected: "batman" + } log "toUpper" - assert $ toUpper "bAtMaN" == "BATMAN" + assertEqual + { actual: S.toUpper "bAtMaN" + , expected: "BATMAN" + } log "trim" - assert $ trim " abc " == "abc" + assertEqual + { actual: S.trim " abc " + , expected: "abc" + } log "joinWith" - assert $ joinWith "" [] == "" - assert $ joinWith "" ["a", "b"] == "ab" - assert $ joinWith "--" ["a", "b", "c"] == "a--b--c" - - log "slice" - assert $ slice 0 0 "purescript" == Just "" - assert $ slice 0 1 "purescript" == Just "p" - assert $ slice 3 6 "purescript" == Just "esc" - assert $ slice (-4) (-1) "purescript" == Just "rip" - assert $ slice (-4) 3 "purescript" == Nothing -- b' > e' - assert $ slice 1000 3 "purescript" == Nothing -- b' > e' (subsumes b > l) - assert $ slice 2 (-15) "purescript" == Nothing -- e' < 0 - assert $ slice (-15) 9 "purescript" == Nothing -- b' < 0 - assert $ slice 3 1000 "purescript" == Nothing -- e > l + assertEqual + { actual: S.joinWith "" [] + , expected: "" + } + assertEqual + { actual: S.joinWith "" ["a", "b"] + , expected: "ab" + } + assertEqual + { actual: S.joinWith "--" ["a", "b", "c"] + , expected: "a--b--c" + } diff --git a/test/Test/Data/String/CaseInsensitive.purs b/test/Test/Data/String/CaseInsensitive.purs index ec7d4bf..a263732 100644 --- a/test/Test/Data/String/CaseInsensitive.purs +++ b/test/Test/Data/String/CaseInsensitive.purs @@ -1,18 +1,22 @@ module Test.Data.String.CaseInsensitive (testCaseInsensitiveString) where -import Prelude (Unit, (==), ($), discard, compare, Ordering(..)) +import Prelude +import Data.String.CaseInsensitive (CaseInsensitiveString(..)) import Effect (Effect) import Effect.Console (log) - -import Data.String.CaseInsensitive - -import Test.Assert (assert) +import Test.Assert (assertEqual) testCaseInsensitiveString :: Effect Unit testCaseInsensitiveString = do log "equality" - assert $ CaseInsensitiveString "aB" == CaseInsensitiveString "AB" + assertEqual + { actual: CaseInsensitiveString "aB" + , expected: CaseInsensitiveString "AB" + } log "comparison" - assert $ compare (CaseInsensitiveString "qwerty") (CaseInsensitiveString "QWERTY") == EQ + assertEqual + { actual: compare (CaseInsensitiveString "qwerty") (CaseInsensitiveString "QWERTY") + , expected: EQ + } diff --git a/test/Test/Data/String/CodePoints.purs b/test/Test/Data/String/CodePoints.purs index 29c282d..a3e49b9 100644 --- a/test/Test/Data/String/CodePoints.purs +++ b/test/Test/Data/String/CodePoints.purs @@ -2,209 +2,644 @@ module Test.Data.String.CodePoints (testStringCodePoints) where import Prelude +import Data.Enum (fromEnum, toEnum) +import Data.Maybe (Maybe(..), fromJust) +import Data.String.CodePoints as SCP +import Data.String.Pattern (Pattern(..)) import Effect (Effect) import Effect.Console (log) - -import Data.Char (fromCharCode) -import Data.Maybe (Maybe(..), isNothing, maybe) -import Data.String.CodePoints - -import Test.Assert (assert) +import Partial.Unsafe (unsafePartial) +import Test.Assert (assertEqual) str :: String str = "a\xDC00\xD800\xD800\x16805\x16A06\&z" testStringCodePoints :: Effect Unit testStringCodePoints = do - log "show" - assert $ map show (codePointAt 0 str) == Just "(CodePoint 0x61)" - assert $ map show (codePointAt 1 str) == Just "(CodePoint 0xDC00)" - assert $ map show (codePointAt 2 str) == Just "(CodePoint 0xD800)" - assert $ map show (codePointAt 3 str) == Just "(CodePoint 0xD800)" - assert $ map show (codePointAt 4 str) == Just "(CodePoint 0x16805)" - assert $ map show (codePointAt 5 str) == Just "(CodePoint 0x16A06)" - assert $ map show (codePointAt 6 str) == Just "(CodePoint 0x7A)" - log "codePointAt" - assert $ codePointAt (-1) str == Nothing - assert $ codePointAt 0 str == (codePointFromInt 0x61) - assert $ codePointAt 1 str == (codePointFromInt 0xDC00) - assert $ codePointAt 2 str == (codePointFromInt 0xD800) - assert $ codePointAt 3 str == (codePointFromInt 0xD800) - assert $ codePointAt 4 str == (codePointFromInt 0x16805) - assert $ codePointAt 5 str == (codePointFromInt 0x16A06) - assert $ codePointAt 6 str == (codePointFromInt 0x7A) - assert $ codePointAt 7 str == Nothing + log "show" + assertEqual + { actual: map show (SCP.codePointAt 0 str) + , expected: Just "(CodePoint 0x61)" + } + assertEqual + { actual: map show (SCP.codePointAt 1 str) + , expected: Just "(CodePoint 0xDC00)" + } + assertEqual + { actual: map show (SCP.codePointAt 2 str) + , expected: Just "(CodePoint 0xD800)" + } + assertEqual + { actual: map show (SCP.codePointAt 3 str) + , expected: Just "(CodePoint 0xD800)" + } + assertEqual + { actual: map show (SCP.codePointAt 4 str) + , expected: Just "(CodePoint 0x16805)" + } + assertEqual + { actual: map show (SCP.codePointAt 5 str) + , expected: Just "(CodePoint 0x16A06)" + } + assertEqual + { actual: map show (SCP.codePointAt 6 str) + , expected: Just "(CodePoint 0x7A)" + } log "codePointFromChar" - assert $ Just (codePointFromChar 'A') == (codePointFromInt 65) - assert $ Just (codePointFromChar $ fromCharCode 0) == codePointFromInt 0 - assert $ Just (codePointFromChar $ fromCharCode 0xFFFF) == codePointFromInt 0xFFFF + assertEqual + { actual: Just (SCP.codePointFromChar 'A') + , expected: (toEnum 65) + } + assertEqual + { actual: (SCP.codePointFromChar <$> toEnum 0) + , expected: toEnum 0 + } + assertEqual + { actual: (SCP.codePointFromChar <$> toEnum 0xFFFF) + , expected: toEnum 0xFFFF + } - log "countPrefix" - assert $ countPrefix (\_ -> true) "" == 0 - assert $ countPrefix (\_ -> false) str == 0 - assert $ countPrefix (\_ -> true) str == 7 - assert $ countPrefix (\x -> codePointToInt x < 0xFFFF) str == 4 - assert $ countPrefix (\x -> codePointToInt x < 0xDC00) str == 1 + log "singleton" + assertEqual + { actual: (SCP.singleton <$> toEnum 0x30) + , expected: Just "0" + } + assertEqual + { actual: (SCP.singleton <$> toEnum 0x16805) + , expected: Just "\x16805" + } - log "drop" - assert $ drop (-1) str == str - assert $ drop 0 str == str - assert $ drop 1 str == "\xDC00\xD800\xD800\x16805\x16A06\&z" - assert $ drop 2 str == "\xD800\xD800\x16805\x16A06\&z" - assert $ drop 3 str == "\xD800\x16805\x16A06\&z" - assert $ drop 4 str == "\x16805\x16A06\&z" - assert $ drop 5 str == "\x16A06\&z" - assert $ drop 6 str == "z" - assert $ drop 7 str == "" - assert $ drop 8 str == "" + log "codePointAt" + assertEqual + { actual: SCP.codePointAt (-1) str + , expected: Nothing + } + assertEqual + { actual: SCP.codePointAt 0 str + , expected: (toEnum 0x61) + } + assertEqual + { actual: SCP.codePointAt 1 str + , expected: (toEnum 0xDC00) + } + assertEqual + { actual: SCP.codePointAt 2 str + , expected: (toEnum 0xD800) + } + assertEqual + { actual: SCP.codePointAt 3 str + , expected: (toEnum 0xD800) + } + assertEqual + { actual: SCP.codePointAt 4 str + , expected: (toEnum 0x16805) + } + assertEqual + { actual: SCP.codePointAt 5 str + , expected: (toEnum 0x16A06) + } + assertEqual + { actual: SCP.codePointAt 6 str + , expected: (toEnum 0x7A) + } + assertEqual + { actual: SCP.codePointAt 7 str + , expected: Nothing + } - log "dropWhile" - assert $ dropWhile (\_ -> true) str == "" - assert $ dropWhile (\_ -> false) str == str - assert $ dropWhile (\c -> codePointToInt c < 0xFFFF) str == "\x16805\x16A06\&z" - assert $ dropWhile (\c -> codePointToInt c < 0xDC00) str == "\xDC00\xD800\xD800\x16805\x16A06\&z" + log "uncons" + assertEqual + { actual: SCP.uncons str + , expected: Just {head: cp 0x61, tail: "\xDC00\xD800\xD800\x16805\x16A06\&z"} + } + assertEqual + { actual: SCP.uncons (SCP.drop 1 str) + , expected: Just {head: cp 0xDC00, tail: "\xD800\xD800\x16805\x16A06\&z"} + } + assertEqual + { actual: SCP.uncons (SCP.drop 2 str) + , expected: Just {head: cp 0xD800, tail: "\xD800\x16805\x16A06\&z"} + } + assertEqual + { actual: SCP.uncons (SCP.drop 3 str) + , expected: Just {head: cp 0xD800, tail: "\x16805\x16A06\&z"} + } + assertEqual + { actual: SCP.uncons (SCP.drop 4 str) + , expected: Just {head: cp 0x16805, tail: "\x16A06\&z"} + } + assertEqual + { actual: SCP.uncons (SCP.drop 5 str) + , expected: Just {head: cp 0x16A06, tail: "z"} + } + assertEqual + { actual: SCP.uncons (SCP.drop 6 str) + , expected: Just {head: cp 0x7A, tail: ""} + } + assertEqual + { actual: SCP.uncons "" + , expected: Nothing + } + + log "length" + assertEqual + { actual: SCP.length "" + , expected: 0 + } + assertEqual + { actual: SCP.length "a" + , expected: 1 + } + assertEqual + { actual: SCP.length "ab" + , expected: 2 + } + assertEqual + { actual: SCP.length str + , expected: 7 + } + + log "countPrefix" + assertEqual + { actual: SCP.countPrefix (\_ -> true) "" + , expected: 0 + } + assertEqual + { actual: SCP.countPrefix (\_ -> false) str + , expected: 0 + } + assertEqual + { actual: SCP.countPrefix (\_ -> true) str + , expected: 7 + } + assertEqual + { actual: SCP.countPrefix (\x -> fromEnum x < 0xFFFF) str + , expected: 4 + } + assertEqual + { actual: SCP.countPrefix (\x -> fromEnum x < 0xDC00) str + , expected: 1 + } log "indexOf" - assert $ indexOf (Pattern "") "" == Just 0 - assert $ indexOf (Pattern "") str == Just 0 - assert $ indexOf (Pattern str) str == Just 0 - assert $ indexOf (Pattern "a") str == Just 0 - assert $ indexOf (Pattern "\xDC00\xD800\xD800") str == Just 1 - assert $ indexOf (Pattern "\xD800") str == Just 2 - assert $ indexOf (Pattern "\xD800\xD800") str == Just 2 - assert $ indexOf (Pattern "\xD800\xD81A") str == Just 3 - assert $ indexOf (Pattern "\xD800\x16805") str == Just 3 - assert $ indexOf (Pattern "\x16805") str == Just 4 - assert $ indexOf (Pattern "\x16A06") str == Just 5 - assert $ indexOf (Pattern "z") str == Just 6 - assert $ indexOf (Pattern "\0") str == Nothing - assert $ indexOf (Pattern "\xD81A") str == Just 4 + assertEqual + { actual: SCP.indexOf (Pattern "") "" + , expected: Just 0 + } + assertEqual + { actual: SCP.indexOf (Pattern "") str + , expected: Just 0 + } + assertEqual + { actual: SCP.indexOf (Pattern str) str + , expected: Just 0 + } + assertEqual + { actual: SCP.indexOf (Pattern "a") str + , expected: Just 0 + } + assertEqual + { actual: SCP.indexOf (Pattern "\xDC00\xD800\xD800") str + , expected: Just 1 + } + assertEqual + { actual: SCP.indexOf (Pattern "\xD800") str + , expected: Just 2 + } + assertEqual + { actual: SCP.indexOf (Pattern "\xD800\xD800") str + , expected: Just 2 + } + assertEqual + { actual: SCP.indexOf (Pattern "\xD800\xD81A") str + , expected: Just 3 + } + assertEqual + { actual: SCP.indexOf (Pattern "\xD800\x16805") str + , expected: Just 3 + } + assertEqual + { actual: SCP.indexOf (Pattern "\x16805") str + , expected: Just 4 + } + assertEqual + { actual: SCP.indexOf (Pattern "\x16A06") str + , expected: Just 5 + } + assertEqual + { actual: SCP.indexOf (Pattern "z") str + , expected: Just 6 + } + assertEqual + { actual: SCP.indexOf (Pattern "\0") str + , expected: Nothing + } + assertEqual + { actual: SCP.indexOf (Pattern "\xD81A") str + , expected: Just 4 + } log "indexOf'" - assert $ indexOf' (Pattern "") 0 "" == Just 0 - assert $ indexOf' (Pattern str) 0 str == Just 0 - assert $ indexOf' (Pattern str) 1 str == Nothing - assert $ indexOf' (Pattern "a") 0 str == Just 0 - assert $ indexOf' (Pattern "a") 1 str == Nothing - assert $ indexOf' (Pattern "z") 0 str == Just 6 - assert $ indexOf' (Pattern "z") 1 str == Just 6 - assert $ indexOf' (Pattern "z") 2 str == Just 6 - assert $ indexOf' (Pattern "z") 3 str == Just 6 - assert $ indexOf' (Pattern "z") 4 str == Just 6 - assert $ indexOf' (Pattern "z") 5 str == Just 6 - assert $ indexOf' (Pattern "z") 6 str == Just 6 - assert $ indexOf' (Pattern "z") 7 str == Nothing + assertEqual + { actual: SCP.indexOf' (Pattern "") 0 "" + , expected: Just 0 + } + assertEqual + { actual: SCP.indexOf' (Pattern str) 0 str + , expected: Just 0 + } + assertEqual + { actual: SCP.indexOf' (Pattern str) 1 str + , expected: Nothing + } + assertEqual + { actual: SCP.indexOf' (Pattern "a") 0 str + , expected: Just 0 + } + assertEqual + { actual: SCP.indexOf' (Pattern "a") 1 str + , expected: Nothing + } + assertEqual + { actual: SCP.indexOf' (Pattern "z") 0 str + , expected: Just 6 + } + assertEqual + { actual: SCP.indexOf' (Pattern "z") 1 str + , expected: Just 6 + } + assertEqual + { actual: SCP.indexOf' (Pattern "z") 2 str + , expected: Just 6 + } + assertEqual + { actual: SCP.indexOf' (Pattern "z") 3 str + , expected: Just 6 + } + assertEqual + { actual: SCP.indexOf' (Pattern "z") 4 str + , expected: Just 6 + } + assertEqual + { actual: SCP.indexOf' (Pattern "z") 5 str + , expected: Just 6 + } + assertEqual + { actual: SCP.indexOf' (Pattern "z") 6 str + , expected: Just 6 + } + assertEqual + { actual: SCP.indexOf' (Pattern "z") 7 str + , expected: Nothing + } log "lastIndexOf" - assert $ lastIndexOf (Pattern "") "" == Just 0 - assert $ lastIndexOf (Pattern "") str == Just 7 - assert $ lastIndexOf (Pattern str) str == Just 0 - assert $ lastIndexOf (Pattern "a") str == Just 0 - assert $ lastIndexOf (Pattern "\xDC00\xD800\xD800") str == Just 1 - assert $ lastIndexOf (Pattern "\xD800") str == Just 3 - assert $ lastIndexOf (Pattern "\xD800\xD800") str == Just 2 - assert $ lastIndexOf (Pattern "\xD800\xD81A") str == Just 3 - assert $ lastIndexOf (Pattern "\xD800\x16805") str == Just 3 - assert $ lastIndexOf (Pattern "\x16805") str == Just 4 - assert $ lastIndexOf (Pattern "\x16A06") str == Just 5 - assert $ lastIndexOf (Pattern "z") str == Just 6 - assert $ lastIndexOf (Pattern "\0") str == Nothing - assert $ lastIndexOf (Pattern "\xD81A") str == Just 5 + assertEqual + { actual: SCP.lastIndexOf (Pattern "") "" + , expected: Just 0 + } + assertEqual + { actual: SCP.lastIndexOf (Pattern "") str + , expected: Just 7 + } + assertEqual + { actual: SCP.lastIndexOf (Pattern str) str + , expected: Just 0 + } + assertEqual + { actual: SCP.lastIndexOf (Pattern "a") str + , expected: Just 0 + } + assertEqual + { actual: SCP.lastIndexOf (Pattern "\xDC00\xD800\xD800") str + , expected: Just 1 + } + assertEqual + { actual: SCP.lastIndexOf (Pattern "\xD800") str + , expected: Just 3 + } + assertEqual + { actual: SCP.lastIndexOf (Pattern "\xD800\xD800") str + , expected: Just 2 + } + assertEqual + { actual: SCP.lastIndexOf (Pattern "\xD800\xD81A") str + , expected: Just 3 + } + assertEqual + { actual: SCP.lastIndexOf (Pattern "\xD800\x16805") str + , expected: Just 3 + } + assertEqual + { actual: SCP.lastIndexOf (Pattern "\x16805") str + , expected: Just 4 + } + assertEqual + { actual: SCP.lastIndexOf (Pattern "\x16A06") str + , expected: Just 5 + } + assertEqual + { actual: SCP.lastIndexOf (Pattern "z") str + , expected: Just 6 + } + assertEqual + { actual: SCP.lastIndexOf (Pattern "\0") str + , expected: Nothing + } + assertEqual + { actual: SCP.lastIndexOf (Pattern "\xD81A") str + , expected: Just 5 + } log "lastIndexOf'" - assert $ lastIndexOf' (Pattern "") 0 "" == Just 0 - assert $ lastIndexOf' (Pattern str) 0 str == Just 0 - assert $ lastIndexOf' (Pattern str) 1 str == Just 0 - assert $ lastIndexOf' (Pattern "a") 0 str == Just 0 - assert $ lastIndexOf' (Pattern "a") 7 str == Just 0 - assert $ lastIndexOf' (Pattern "z") 0 str == Nothing - assert $ lastIndexOf' (Pattern "z") 1 str == Nothing - assert $ lastIndexOf' (Pattern "z") 2 str == Nothing - assert $ lastIndexOf' (Pattern "z") 3 str == Nothing - assert $ lastIndexOf' (Pattern "z") 4 str == Nothing - assert $ lastIndexOf' (Pattern "z") 5 str == Nothing - assert $ lastIndexOf' (Pattern "z") 6 str == Just 6 - assert $ lastIndexOf' (Pattern "z") 7 str == Just 6 - assert $ lastIndexOf' (Pattern "\xD800") 7 str == Just 3 - assert $ lastIndexOf' (Pattern "\xD800") 6 str == Just 3 - assert $ lastIndexOf' (Pattern "\xD800") 5 str == Just 3 - assert $ lastIndexOf' (Pattern "\xD800") 4 str == Just 3 - assert $ lastIndexOf' (Pattern "\xD800") 3 str == Just 3 - assert $ lastIndexOf' (Pattern "\xD800") 2 str == Just 2 - assert $ lastIndexOf' (Pattern "\xD800") 1 str == Nothing - assert $ lastIndexOf' (Pattern "\xD800") 0 str == Nothing - assert $ lastIndexOf' (Pattern "\x16A06") 7 str == Just 5 - assert $ lastIndexOf' (Pattern "\x16A06") 6 str == Just 5 - assert $ lastIndexOf' (Pattern "\x16A06") 5 str == Just 5 - assert $ lastIndexOf' (Pattern "\x16A06") 4 str == Nothing - assert $ lastIndexOf' (Pattern "\x16A06") 3 str == Nothing + assertEqual + { actual: SCP.lastIndexOf' (Pattern "") 0 "" + , expected: Just 0 + } + assertEqual + { actual: SCP.lastIndexOf' (Pattern str) 0 str + , expected: Just 0 + } + assertEqual + { actual: SCP.lastIndexOf' (Pattern str) 1 str + , expected: Just 0 + } + assertEqual + { actual: SCP.lastIndexOf' (Pattern "a") 0 str + , expected: Just 0 + } + assertEqual + { actual: SCP.lastIndexOf' (Pattern "a") 7 str + , expected: Just 0 + } + assertEqual + { actual: SCP.lastIndexOf' (Pattern "z") 0 str + , expected: Nothing + } + assertEqual + { actual: SCP.lastIndexOf' (Pattern "z") 1 str + , expected: Nothing + } + assertEqual + { actual: SCP.lastIndexOf' (Pattern "z") 2 str + , expected: Nothing + } + assertEqual + { actual: SCP.lastIndexOf' (Pattern "z") 3 str + , expected: Nothing + } + assertEqual + { actual: SCP.lastIndexOf' (Pattern "z") 4 str + , expected: Nothing + } + assertEqual + { actual: SCP.lastIndexOf' (Pattern "z") 5 str + , expected: Nothing + } + assertEqual + { actual: SCP.lastIndexOf' (Pattern "z") 6 str + , expected: Just 6 + } + assertEqual + { actual: SCP.lastIndexOf' (Pattern "z") 7 str + , expected: Just 6 + } + assertEqual + { actual: SCP.lastIndexOf' (Pattern "\xD800") 7 str + , expected: Just 3 + } + assertEqual + { actual: SCP.lastIndexOf' (Pattern "\xD800") 6 str + , expected: Just 3 + } + assertEqual + { actual: SCP.lastIndexOf' (Pattern "\xD800") 5 str + , expected: Just 3 + } + assertEqual + { actual: SCP.lastIndexOf' (Pattern "\xD800") 4 str + , expected: Just 3 + } + assertEqual + { actual: SCP.lastIndexOf' (Pattern "\xD800") 3 str + , expected: Just 3 + } + assertEqual + { actual: SCP.lastIndexOf' (Pattern "\xD800") 2 str + , expected: Just 2 + } + assertEqual + { actual: SCP.lastIndexOf' (Pattern "\xD800") 1 str + , expected: Nothing + } + assertEqual + { actual: SCP.lastIndexOf' (Pattern "\xD800") 0 str + , expected: Nothing + } + assertEqual + { actual: SCP.lastIndexOf' (Pattern "\x16A06") 7 str + , expected: Just 5 + } + assertEqual + { actual: SCP.lastIndexOf' (Pattern "\x16A06") 6 str + , expected: Just 5 + } + assertEqual + { actual: SCP.lastIndexOf' (Pattern "\x16A06") 5 str + , expected: Just 5 + } + assertEqual + { actual: SCP.lastIndexOf' (Pattern "\x16A06") 4 str + , expected: Nothing + } + assertEqual + { actual: SCP.lastIndexOf' (Pattern "\x16A06") 3 str + , expected: Nothing + } - log "length" - assert $ length "" == 0 - assert $ length "a" == 1 - assert $ length "ab" == 2 - assert $ length str == 7 + log "take" + assertEqual + { actual: SCP.take (-1) str + , expected: "" + } + assertEqual + { actual: SCP.take 0 str + , expected: "" + } + assertEqual + { actual: SCP.take 1 str + , expected: "a" + } + assertEqual + { actual: SCP.take 2 str + , expected: "a\xDC00" + } + assertEqual + { actual: SCP.take 3 str + , expected: "a\xDC00\xD800" + } + assertEqual + { actual: SCP.take 4 str + , expected: "a\xDC00\xD800\xD800" + } + assertEqual + { actual: SCP.take 5 str + , expected: "a\xDC00\xD800\xD800\x16805" + } + assertEqual + { actual: SCP.take 6 str + , expected: "a\xDC00\xD800\xD800\x16805\x16A06" + } + assertEqual + { actual: SCP.take 7 str + , expected: str + } + assertEqual + { actual: SCP.take 8 str + , expected: str + } - log "singleton" - assert $ (singleton <$> codePointFromInt 0x30) == Just "0" - assert $ (singleton <$> codePointFromInt 0x16805) == Just "\x16805" + log "takeWhile" + assertEqual + { actual: SCP.takeWhile (\_ -> true) str + , expected: str + } + assertEqual + { actual: SCP.takeWhile (\_ -> false) str + , expected: "" + } + assertEqual + { actual: SCP.takeWhile (\c -> fromEnum c < 0xFFFF) str + , expected: "a\xDC00\xD800\xD800" + } + assertEqual + { actual: SCP.takeWhile (\c -> fromEnum c < 0xDC00) str + , expected: "a" + } - log "splitAt" - let testSplitAt i s r = - assert $ case splitAt i s of - { before, after } -> - r.before == before && r.after == after - - testSplitAt 0 "" {before: "", after: "" } - testSplitAt 1 "" {before: "", after: "" } - testSplitAt 0 "a" {before: "", after: "a"} - testSplitAt 1 "ab" {before: "a", after: "b"} - testSplitAt 3 "aabcc" {before: "aab", after: "cc"} - testSplitAt (-1) "abc" {before: "", after: "abc"} - testSplitAt 0 str {before: "", after: str} - testSplitAt 1 str {before: "a", after: "\xDC00\xD800\xD800\x16805\x16A06\&z"} - testSplitAt 2 str {before: "a\xDC00", after: "\xD800\xD800\x16805\x16A06\&z"} - testSplitAt 3 str {before: "a\xDC00\xD800", after: "\xD800\x16805\x16A06\&z"} - testSplitAt 4 str {before: "a\xDC00\xD800\xD800", after: "\x16805\x16A06\&z"} - testSplitAt 5 str {before: "a\xDC00\xD800\xD800\x16805", after: "\x16A06\&z"} - testSplitAt 6 str {before: "a\xDC00\xD800\xD800\x16805\x16A06", after: "z"} - testSplitAt 7 str {before: str, after: ""} - testSplitAt 8 str {before: str, after: ""} + log "drop" + assertEqual + { actual: SCP.drop (-1) str + , expected: str + } + assertEqual + { actual: SCP.drop 0 str + , expected: str + } + assertEqual + { actual: SCP.drop 1 str + , expected: "\xDC00\xD800\xD800\x16805\x16A06\&z" + } + assertEqual + { actual: SCP.drop 2 str + , expected: "\xD800\xD800\x16805\x16A06\&z" + } + assertEqual + { actual: SCP.drop 3 str + , expected: "\xD800\x16805\x16A06\&z" + } + assertEqual + { actual: SCP.drop 4 str + , expected: "\x16805\x16A06\&z" + } + assertEqual + { actual: SCP.drop 5 str + , expected: "\x16A06\&z" + } + assertEqual + { actual: SCP.drop 6 str + , expected: "z" + } + assertEqual + { actual: SCP.drop 7 str + , expected: "" + } + assertEqual + { actual: SCP.drop 8 str + , expected: "" + } - log "take" - assert $ take (-1) str == "" - assert $ take 0 str == "" - assert $ take 1 str == "a" - assert $ take 2 str == "a\xDC00" - assert $ take 3 str == "a\xDC00\xD800" - assert $ take 4 str == "a\xDC00\xD800\xD800" - assert $ take 5 str == "a\xDC00\xD800\xD800\x16805" - assert $ take 6 str == "a\xDC00\xD800\xD800\x16805\x16A06" - assert $ take 7 str == str - assert $ take 8 str == str + log "dropWhile" + assertEqual + { actual: SCP.dropWhile (\_ -> true) str + , expected: "" + } + assertEqual + { actual: SCP.dropWhile (\_ -> false) str + , expected: str + } + assertEqual + { actual: SCP.dropWhile (\c -> fromEnum c < 0xFFFF) str + , expected: "\x16805\x16A06\&z" + } + assertEqual + { actual: SCP.dropWhile (\c -> fromEnum c < 0xDC00) str + , expected: "\xDC00\xD800\xD800\x16805\x16A06\&z" + } - log "takeWhile" - assert $ takeWhile (\_ -> true) str == str - assert $ takeWhile (\_ -> false) str == "" - assert $ takeWhile (\c -> codePointToInt c < 0xFFFF) str == "a\xDC00\xD800\xD800" - assert $ takeWhile (\c -> codePointToInt c < 0xDC00) str == "a" + log "splitAt" + assertEqual + { actual: SCP.splitAt 0 "" + , expected: {before: "", after: "" } + } + assertEqual + { actual: SCP.splitAt 1 "" + , expected: {before: "", after: "" } + } + assertEqual + { actual: SCP.splitAt 0 "a" + , expected: {before: "", after: "a"} + } + assertEqual + { actual: SCP.splitAt 1 "ab" + , expected: {before: "a", after: "b"} + } + assertEqual + { actual: SCP.splitAt 3 "aabcc" + , expected: {before: "aab", after: "cc"} + } + assertEqual + { actual: SCP.splitAt (-1) "abc" + , expected: {before: "", after: "abc"} + } + assertEqual + { actual: SCP.splitAt 0 str + , expected: {before: "", after: str} + } + assertEqual + { actual: SCP.splitAt 1 str + , expected: {before: "a", after: "\xDC00\xD800\xD800\x16805\x16A06\&z"} + } + assertEqual + { actual: SCP.splitAt 2 str + , expected: {before: "a\xDC00", after: "\xD800\xD800\x16805\x16A06\&z"} + } + assertEqual + { actual: SCP.splitAt 3 str + , expected: {before: "a\xDC00\xD800", after: "\xD800\x16805\x16A06\&z"} + } + assertEqual + { actual: SCP.splitAt 4 str + , expected: {before: "a\xDC00\xD800\xD800", after: "\x16805\x16A06\&z"} + } + assertEqual + { actual: SCP.splitAt 5 str + , expected: {before: "a\xDC00\xD800\xD800\x16805", after: "\x16A06\&z"} + } + assertEqual + { actual: SCP.splitAt 6 str + , expected: {before: "a\xDC00\xD800\xD800\x16805\x16A06", after: "z"} + } + assertEqual + { actual: SCP.splitAt 7 str + , expected: {before: str, after: ""} + } + assertEqual + { actual: SCP.splitAt 8 str + , expected: {before: str, after: ""} + } - log "uncons" - let testUncons s res = - assert $ case uncons s of - Nothing -> - isNothing res - Just { head, tail } -> - maybe false (\r -> - r.head == codePointToInt head && r.tail == tail) res - - testUncons str $ Just {head: 0x61, tail: "\xDC00\xD800\xD800\x16805\x16A06\&z"} - testUncons (drop 1 str) $ Just {head: 0xDC00, tail: "\xD800\xD800\x16805\x16A06\&z"} - testUncons (drop 2 str) $ Just {head: 0xD800, tail: "\xD800\x16805\x16A06\&z"} - testUncons (drop 3 str) $ Just {head: 0xD800, tail: "\x16805\x16A06\&z"} - testUncons (drop 4 str) $ Just {head: 0x16805, tail: "\x16A06\&z"} - testUncons (drop 5 str) $ Just {head: 0x16A06, tail: "z"} - testUncons (drop 6 str) $ Just {head: 0x7A, tail: ""} - testUncons "" Nothing +cp :: Int -> SCP.CodePoint +cp = unsafePartial fromJust <<< toEnum diff --git a/test/Test/Data/String/CodeUnits.purs b/test/Test/Data/String/CodeUnits.purs new file mode 100644 index 0000000..eb35d15 --- /dev/null +++ b/test/Test/Data/String/CodeUnits.purs @@ -0,0 +1,456 @@ +module Test.Data.String.CodeUnits (testStringCodeUnits) where + +import Prelude + +import Data.Enum (fromEnum) +import Data.Maybe (Maybe(..), isNothing) +import Data.String.CodeUnits as SCU +import Data.String.Pattern (Pattern(..)) +import Effect (Effect) +import Effect.Console (log) +import Test.Assert (assert, assertEqual) + +testStringCodeUnits :: Effect Unit +testStringCodeUnits = do + log "charAt" + assertEqual + { actual: SCU.charAt 0 "" + , expected: Nothing + } + assertEqual + { actual: SCU.charAt 0 "a" + , expected: Just 'a' + } + assertEqual + { actual: SCU.charAt 1 "a" + , expected: Nothing + } + assertEqual + { actual: SCU.charAt 0 "ab" + , expected: Just 'a' + } + assertEqual + { actual: SCU.charAt 1 "ab" + , expected: Just 'b' + } + assertEqual + { actual: SCU.charAt 2 "ab" + , expected: Nothing + } + + log "singleton" + assertEqual + { actual: SCU.singleton 'a' + , expected: "a" + } + + log "charCodeAt" + assertEqual + { actual: (fromEnum <$> SCU.charAt 0 "") + , expected: Nothing + } + assertEqual + { actual: (fromEnum <$> SCU.charAt 0 "a") + , expected: Just 97 + } + assertEqual + { actual: (fromEnum <$> SCU.charAt 1 "a") + , expected: Nothing + } + assertEqual + { actual: (fromEnum <$> SCU.charAt 0 "ab") + , expected: Just 97 + } + assertEqual + { actual: (fromEnum <$> SCU.charAt 1 "ab") + , expected: Just 98 + } + assertEqual + { actual: (fromEnum <$> SCU.charAt 2 "ab") + , expected: Nothing + } + + log "toChar" + assertEqual + { actual: SCU.toChar "" + , expected: Nothing + } + assertEqual + { actual: SCU.toChar "a" + , expected: Just 'a' + } + assertEqual + { actual: SCU.toChar "ab" + , expected: Nothing + } + + log "uncons" + assert $ isNothing (SCU.uncons "") + assertEqual + { actual: SCU.uncons "a" + , expected: Just { head: 'a', tail: "" } + } + assertEqual + { actual: SCU.uncons "ab" + , expected: Just { head: 'a', tail: "b" } + } + + log "takeWhile" + assertEqual + { actual: SCU.takeWhile (\c -> true) "abc" + , expected: "abc" + } + assertEqual + { actual: SCU.takeWhile (\c -> false) "abc" + , expected: "" + } + assertEqual + { actual: SCU.takeWhile (\c -> c /= 'b') "aabbcc" + , expected: "aa" + } + + log "dropWhile" + assertEqual + { actual: SCU.dropWhile (\c -> true) "abc" + , expected: "" + } + assertEqual + { actual: SCU.dropWhile (\c -> false) "abc" + , expected: "abc" + } + assertEqual + { actual: SCU.dropWhile (\c -> c /= 'b') "aabbcc" + , expected: "bbcc" + } + + log "fromCharArray" + assertEqual + { actual: SCU.fromCharArray [] + , expected: "" + } + assertEqual + { actual: SCU.fromCharArray ['a', 'b'] + , expected: "ab" + } + + log "indexOf" + assertEqual + { actual: SCU.indexOf (Pattern "") "" + , expected: Just 0 + } + assertEqual + { actual: SCU.indexOf (Pattern "") "abcd" + , expected: Just 0 + } + assertEqual + { actual: SCU.indexOf (Pattern "bc") "abcd" + , expected: Just 1 + } + assertEqual + { actual: SCU.indexOf (Pattern "cb") "abcd" + , expected: Nothing + } + + log "indexOf'" + assertEqual + { actual: SCU.indexOf' (Pattern "") 0 "" + , expected: Just 0 + } + assertEqual + { actual: SCU.indexOf' (Pattern "") (-1) "ab" + , expected: Nothing + } + assertEqual + { actual: SCU.indexOf' (Pattern "") 0 "ab" + , expected: Just 0 + } + assertEqual + { actual: SCU.indexOf' (Pattern "") 1 "ab" + , expected: Just 1 + } + assertEqual + { actual: SCU.indexOf' (Pattern "") 2 "ab" + , expected: Just 2 + } + assertEqual + { actual: SCU.indexOf' (Pattern "") 3 "ab" + , expected: Nothing + } + assertEqual + { actual: SCU.indexOf' (Pattern "bc") 0 "abcd" + , expected: Just 1 + } + assertEqual + { actual: SCU.indexOf' (Pattern "bc") 1 "abcd" + , expected: Just 1 + } + assertEqual + { actual: SCU.indexOf' (Pattern "bc") 2 "abcd" + , expected: Nothing + } + assertEqual + { actual: SCU.indexOf' (Pattern "cb") 0 "abcd" + , expected: Nothing + } + + log "lastIndexOf" + assertEqual + { actual: SCU.lastIndexOf (Pattern "") "" + , expected: Just 0 + } + assertEqual + { actual: SCU.lastIndexOf (Pattern "") "abcd" + , expected: Just 4 + } + assertEqual + { actual: SCU.lastIndexOf (Pattern "bc") "abcd" + , expected: Just 1 + } + assertEqual + { actual: SCU.lastIndexOf (Pattern "cb") "abcd" + , expected: Nothing + } + + log "lastIndexOf'" + assertEqual + { actual: SCU.lastIndexOf' (Pattern "") 0 "" + , expected: Just 0 + } + assertEqual + { actual: SCU.lastIndexOf' (Pattern "") (-1) "ab" + , expected: Nothing + } + assertEqual + { actual: SCU.lastIndexOf' (Pattern "") 0 "ab" + , expected: Just 0 + } + assertEqual + { actual: SCU.lastIndexOf' (Pattern "") 1 "ab" + , expected: Just 1 + } + assertEqual + { actual: SCU.lastIndexOf' (Pattern "") 2 "ab" + , expected: Just 2 + } + assertEqual + { actual: SCU.lastIndexOf' (Pattern "") 3 "ab" + , expected: Nothing + } + assertEqual + { actual: SCU.lastIndexOf' (Pattern "bc") 0 "abcd" + , expected: Nothing + } + assertEqual + { actual: SCU.lastIndexOf' (Pattern "bc") 1 "abcd" + , expected: Just 1 + } + assertEqual + { actual: SCU.lastIndexOf' (Pattern "bc") 2 "abcd" + , expected: Just 1 + } + assertEqual + { actual: SCU.lastIndexOf' (Pattern "cb") 0 "abcd" + , expected: Nothing + } + + log "length" + assertEqual + { actual: SCU.length "" + , expected: 0 + } + assertEqual + { actual: SCU.length "a" + , expected: 1 + } + assertEqual + { actual: SCU.length "ab" + , expected: 2 + } + + log "take" + assertEqual + { actual: SCU.take 0 "ab" + , expected: "" + } + assertEqual + { actual: SCU.take 1 "ab" + , expected: "a" + } + assertEqual + { actual: SCU.take 2 "ab" + , expected: "ab" + } + assertEqual + { actual: SCU.take 3 "ab" + , expected: "ab" + } + assertEqual + { actual: SCU.take (-1) "ab" + , expected: "" + } + + log "takeRight" + assertEqual + { actual: SCU.takeRight 0 "ab" + , expected: "" + } + assertEqual + { actual: SCU.takeRight 1 "ab" + , expected: "b" + } + assertEqual + { actual: SCU.takeRight 2 "ab" + , expected: "ab" + } + assertEqual + { actual: SCU.takeRight 3 "ab" + , expected: "ab" + } + assertEqual + { actual: SCU.takeRight (-1) "ab" + , expected: "" + } + + log "drop" + assertEqual + { actual: SCU.drop 0 "ab" + , expected: "ab" + } + assertEqual + { actual: SCU.drop 1 "ab" + , expected: "b" + } + assertEqual + { actual: SCU.drop 2 "ab" + , expected: "" + } + assertEqual + { actual: SCU.drop 3 "ab" + , expected: "" + } + assertEqual + { actual: SCU.drop (-1) "ab" + , expected: "ab" + } + + log "dropRight" + assertEqual + { actual: SCU.dropRight 0 "ab" + , expected: "ab" + } + assertEqual + { actual: SCU.dropRight 1 "ab" + , expected: "a" + } + assertEqual + { actual: SCU.dropRight 2 "ab" + , expected: "" + } + assertEqual + { actual: SCU.dropRight 3 "ab" + , expected: "" + } + assertEqual + { actual: SCU.dropRight (-1) "ab" + , expected: "ab" + } + + log "countPrefix" + assertEqual + { actual: SCU.countPrefix (_ == 'a') "" + , expected: 0 + } + assertEqual + { actual: SCU.countPrefix (_ == 'a') "ab" + , expected: 1 + } + assertEqual + { actual: SCU.countPrefix (_ == 'a') "aaab" + , expected: 3 + } + assertEqual + { actual: SCU.countPrefix (_ == 'a') "abaa" + , expected: 1 + } + + log "splitAt" + assertEqual + { actual: SCU.splitAt 1 "" + , expected: {before: "", after: ""} + } + assertEqual + { actual: SCU.splitAt 0 "a" + , expected: {before: "", after: "a"} + } + assertEqual + { actual: SCU.splitAt 1 "a" + , expected: {before: "a", after: ""} + } + assertEqual + { actual: SCU.splitAt 1 "ab" + , expected: {before: "a", after: "b"} + } + assertEqual + { actual: SCU.splitAt 3 "aabcc" + , expected: {before: "aab", after: "cc"} + } + assertEqual + { actual: SCU.splitAt (-1) "abc" + , expected: {before: "", after: "abc"} + } + assertEqual + { actual: SCU.splitAt 10 "Hi" + , expected: {before: "Hi", after: ""} + } + + log "toCharArray" + assertEqual + { actual: SCU.toCharArray "" + , expected: [] + } + assertEqual + { actual: SCU.toCharArray "a" + , expected: ['a'] + } + assertEqual + { actual: SCU.toCharArray "ab" + , expected: ['a', 'b'] + } + + log "slice" + assertEqual + { actual: SCU.slice 0 0 "purescript" + , expected: Just "" + } + assertEqual + { actual: SCU.slice 0 1 "purescript" + , expected: Just "p" + } + assertEqual + { actual: SCU.slice 3 6 "purescript" + , expected: Just "esc" + } + assertEqual + { actual: SCU.slice (-4) (-1) "purescript" + , expected: Just "rip" + } + assertEqual + { actual: SCU.slice (-4) 3 "purescript" + , expected: Nothing -- b' > e' + } + assertEqual + { actual: SCU.slice 1000 3 "purescript" + , expected: Nothing -- b' > e' (subsumes b > l) + } + assertEqual + { actual: SCU.slice 2 (-15) "purescript" + , expected: Nothing -- e' < 0 + } + assertEqual + { actual: SCU.slice (-15) 9 "purescript" + , expected: Nothing -- b' < 0 + } + assertEqual + { actual: SCU.slice 3 1000 "purescript" + , expected: Nothing -- e > l + } diff --git a/test/Test/Data/String/NonEmpty.purs b/test/Test/Data/String/NonEmpty.purs index e1f0983..59a8f22 100644 --- a/test/Test/Data/String/NonEmpty.purs +++ b/test/Test/Data/String/NonEmpty.purs @@ -1,257 +1,220 @@ module Test.Data.String.NonEmpty (testNonEmptyString) where -import Data.String.NonEmpty +import Prelude import Data.Array.NonEmpty as NEA -import Data.Array.Partial as AP -import Data.Foldable (class Foldable, foldl) -import Data.Maybe (Maybe(..), fromJust, isNothing, maybe) -import Data.Semigroup.Foldable (class Foldable1, foldMap1Default) +import Data.Maybe (Maybe(..), fromJust) +import Data.String.NonEmpty (Pattern(..), nes) +import Data.String.NonEmpty as NES +import Data.Symbol (SProxy(..)) import Effect (Effect) import Effect.Console (log) import Partial.Unsafe (unsafePartial) -import Prelude (class Functor, Ordering(..), Unit, append, discard, negate, not, ($), (&&), (/=), (==)) -import Test.Assert (assert) +import Test.Assert (assert, assertEqual) testNonEmptyString :: Effect Unit testNonEmptyString = do - log "fromString" - assert $ fromString "" == Nothing - assert $ fromString "hello" == Just (nes "hello") - - log "fromCharArray" - assert $ fromCharArray [] == Nothing - assert $ fromCharArray ['a', 'b'] == Just (nes "ab") - - log "fromNonEmptyCharArray" - assert $ fromNonEmptyCharArray (NEA.singleton 'b') == singleton 'b' - - log "singleton" - assert $ singleton 'a' == nes "a" - - log "cons" - assert $ cons 'a' "bc" == nes "abc" - assert $ cons 'a' "" == nes "a" - - log "snoc" - assert $ snoc 'c' "ab" == nes "abc" - assert $ snoc 'a' "" == nes "a" - - log "fromFoldable1" - assert $ fromFoldable1 (NEA ['a']) == nes "a" - assert $ fromFoldable1 (NEA ['a', 'b', 'c']) == nes "abc" - - log "charAt" - assert $ charAt 0 (nes "a") == Just 'a' - assert $ charAt 1 (nes "a") == Nothing - assert $ charAt 0 (nes "ab") == Just 'a' - assert $ charAt 1 (nes "ab") == Just 'b' - assert $ charAt 2 (nes "ab") == Nothing - assert $ charAt 2 (nes "Hello") == Just 'l' - assert $ charAt 10 (nes "Hello") == Nothing - - log "charCodeAt" - assert $ charCodeAt 0 (nes "a") == Just 97 - assert $ charCodeAt 1 (nes "a") == Nothing - assert $ charCodeAt 0 (nes "ab") == Just 97 - assert $ charCodeAt 1 (nes "ab") == Just 98 - assert $ charCodeAt 2 (nes "ab") == Nothing - assert $ charCodeAt 2 (nes "5 €") == Just 0x20AC - assert $ charCodeAt 10 (nes "5 €") == Nothing - log "toChar" - assert $ toChar (nes "a") == Just 'a' - assert $ toChar (nes "ab") == Nothing - - log "toCharArray" - assert $ toCharArray (nes "a") == ['a'] - assert $ toCharArray (nes "ab") == ['a', 'b'] - assert $ toCharArray (nes "Hello☺\n") == ['H','e','l','l','o','☺','\n'] - - log "toNonEmptyCharArray" - assert $ toNonEmptyCharArray (nes "ab") - == unsafePartial fromJust (NEA.fromArray ['a', 'b']) + log "fromString" + assertEqual + { actual: NES.fromString "" + , expected: Nothing + } + assertEqual + { actual: NES.fromString "hello" + , expected: Just (nes (SProxy :: SProxy "hello")) + } + + log "toString" + assertEqual + { actual: (NES.toString <$> NES.fromString "hello") + , expected: Just "hello" + } log "appendString" - assert $ appendString (nes "Hello") " world" == nes "Hello world" - assert $ appendString (nes "Hello") "" == nes "Hello" + assertEqual + { actual: NES.appendString (nes (SProxy :: SProxy "Hello")) " world" + , expected: nes (SProxy :: SProxy "Hello world") + } + assertEqual + { actual: NES.appendString (nes (SProxy :: SProxy "Hello")) "" + , expected: nes (SProxy :: SProxy "Hello") + } log "prependString" - assert $ prependString "be" (nes "fore") == nes "before" - assert $ prependString "" (nes "fore") == nes "fore" - - log "uncons" - assert - let m = uncons (nes "a") - in m.head == 'a' && m.tail == Nothing - assert $ - let m = uncons (nes "Hello World") - in m.head == 'H' && m.tail == Just (nes "ello World") - - log "takeWhile" - assert $ takeWhile (\c -> true) (nes "abc") == Just (nes "abc") - assert $ takeWhile (\c -> false) (nes "abc") == Nothing - assert $ takeWhile (\c -> c /= 'b') (nes "aabbcc") == Just (nes "aa") - assert $ takeWhile (_ /= ':') (nes "http://purescript.org") == Just (nes "http") - assert $ takeWhile (_ == 'a') (nes "xyz") == Nothing - - log "dropWhile" - assert $ dropWhile (\c -> true) (nes "abc") == Nothing - assert $ dropWhile (\c -> false) (nes "abc") == Just (nes "abc") - assert $ dropWhile (\c -> c /= 'b') (nes "aabbcc") == Just (nes "bbcc") - assert $ dropWhile (_ /= '.') (nes "Test.purs") == Just (nes ".purs") - - log "stripPrefix" - assert $ stripPrefix (Pattern "") (nes "abc") == Just (nes "abc") - assert $ stripPrefix (Pattern "a") (nes "abc") == Just (nes "bc") - assert $ stripPrefix (Pattern "abc") (nes "abc") == Nothing - assert $ stripPrefix (Pattern "!") (nes "abc") == Nothing - assert $ stripPrefix (Pattern "http:") (nes "http://purescript.org") == Just (nes "//purescript.org") - assert $ stripPrefix (Pattern "http:") (nes "https://purescript.org") == Nothing - assert $ stripPrefix (Pattern "Hello!") (nes "Hello!") == Nothing - - log "stripSuffix" - assert $ stripSuffix (Pattern ".exe") (nes "purs.exe") == Just (nes "purs") - assert $ stripSuffix (Pattern ".exe") (nes "purs") == Nothing - assert $ stripSuffix (Pattern "Hello!") (nes "Hello!") == Nothing + assertEqual + { actual: NES.prependString "be" (nes (SProxy :: SProxy "fore")) + , expected: nes (SProxy :: SProxy "before") + } + assertEqual + { actual: NES.prependString "" (nes (SProxy :: SProxy "fore")) + , expected: nes (SProxy :: SProxy "fore") + } log "contains" - assert $ contains (Pattern "") (nes "abcd") - assert $ contains (Pattern "bc") (nes "abcd") - assert $ not (contains (Pattern "cb") (nes "abcd")) - assert $ contains (Pattern "needle") (nes "haystack with needle") == true - assert $ contains (Pattern "needle") (nes "haystack") == false - - log "indexOf" - assert $ indexOf (Pattern "") (nes "abcd") == Just 0 - assert $ indexOf (Pattern "bc") (nes "abcd") == Just 1 - assert $ indexOf (Pattern "cb") (nes "abcd") == Nothing - - log "indexOf'" - assert $ indexOf' (Pattern "") (-1) (nes "ab") == Nothing - assert $ indexOf' (Pattern "") 0 (nes "ab") == Just 0 - assert $ indexOf' (Pattern "") 1 (nes "ab") == Just 1 - assert $ indexOf' (Pattern "") 2 (nes "ab") == Just 2 - assert $ indexOf' (Pattern "") 3 (nes "ab") == Nothing - assert $ indexOf' (Pattern "bc") 0 (nes "abcd") == Just 1 - assert $ indexOf' (Pattern "bc") 1 (nes "abcd") == Just 1 - assert $ indexOf' (Pattern "bc") 2 (nes "abcd") == Nothing - assert $ indexOf' (Pattern "cb") 0 (nes "abcd") == Nothing - - log "lastIndexOf" - assert $ lastIndexOf (Pattern "") (nes "abcd") == Just 4 - assert $ lastIndexOf (Pattern "bc") (nes "abcd") == Just 1 - assert $ lastIndexOf (Pattern "cb") (nes "abcd") == Nothing - - log "lastIndexOf'" - assert $ lastIndexOf' (Pattern "") (-1) (nes "ab") == Nothing - assert $ lastIndexOf' (Pattern "") 0 (nes "ab") == Just 0 - assert $ lastIndexOf' (Pattern "") 1 (nes "ab") == Just 1 - assert $ lastIndexOf' (Pattern "") 2 (nes "ab") == Just 2 - assert $ lastIndexOf' (Pattern "") 3 (nes "ab") == Nothing - assert $ lastIndexOf' (Pattern "bc") 0 (nes "abcd") == Nothing - assert $ lastIndexOf' (Pattern "bc") 1 (nes "abcd") == Just 1 - assert $ lastIndexOf' (Pattern "bc") 2 (nes "abcd") == Just 1 - assert $ lastIndexOf' (Pattern "cb") 0 (nes "abcd") == Nothing - - log "length" - assert $ length (nes "a") == 1 - assert $ length (nes "ab") == 2 + assert $ NES.contains (Pattern "") (nes (SProxy :: SProxy "abcd")) + assert $ NES.contains (Pattern "bc") (nes (SProxy :: SProxy "abcd")) + assert $ not NES.contains (Pattern "cb") (nes (SProxy :: SProxy "abcd")) + assert $ NES.contains (Pattern "needle") (nes (SProxy :: SProxy "haystack with needle")) + assert $ not NES.contains (Pattern "needle") (nes (SProxy :: SProxy "haystack")) log "localeCompare" - assert $ localeCompare (nes "a") (nes "a") == EQ - assert $ localeCompare (nes "a") (nes "b") == LT - assert $ localeCompare (nes "b") (nes "a") == GT + assertEqual + { actual: NES.localeCompare (nes (SProxy :: SProxy "a")) (nes (SProxy :: SProxy "a")) + , expected: EQ + } + assertEqual + { actual: NES.localeCompare (nes (SProxy :: SProxy "a")) (nes (SProxy :: SProxy "b")) + , expected: LT + } + assertEqual + { actual: NES.localeCompare (nes (SProxy :: SProxy "b")) (nes (SProxy :: SProxy "a")) + , expected: GT + } log "replace" - assert $ replace (Pattern "b") (NonEmptyReplacement (nes "!")) (nes "abc") == nes "a!c" - assert $ replace (Pattern "b") (NonEmptyReplacement (nes "!")) (nes "abbc") == nes "a!bc" - assert $ replace (Pattern "d") (NonEmptyReplacement (nes "!")) (nes "abc") == nes "abc" + assertEqual + { actual: NES.replace (Pattern "b") (NES.NonEmptyReplacement (nes (SProxy :: SProxy "!"))) (nes (SProxy :: SProxy "abc")) + , expected: nes (SProxy :: SProxy "a!c") + } + assertEqual + { actual: NES.replace (Pattern "b") (NES.NonEmptyReplacement (nes (SProxy :: SProxy "!"))) (nes (SProxy :: SProxy "abbc")) + , expected: nes (SProxy :: SProxy "a!bc") + } + assertEqual + { actual: NES.replace (Pattern "d") (NES.NonEmptyReplacement (nes (SProxy :: SProxy "!"))) (nes (SProxy :: SProxy "abc")) + , expected: nes (SProxy :: SProxy "abc") + } log "replaceAll" - assert $ replaceAll (Pattern "[b]") (NonEmptyReplacement (nes "!")) (nes "a[b]c") == nes "a!c" - assert $ replaceAll (Pattern "[b]") (NonEmptyReplacement (nes "!")) (nes "a[b]c[b]") == nes "a!c!" - assert $ replaceAll (Pattern "x") (NonEmptyReplacement (nes "!")) (nes "abc") == nes "abc" - - log "take" - assert $ take 0 (nes "ab") == Nothing - assert $ take 1 (nes "ab") == Just (nes "a") - assert $ take 2 (nes "ab") == Just (nes "ab") - assert $ take 3 (nes "ab") == Just (nes "ab") - assert $ take (-1) (nes "ab") == Nothing - - log "takeRight" - assert $ takeRight 0 (nes "ab") == Nothing - assert $ takeRight 1 (nes "ab") == Just (nes "b") - assert $ takeRight 2 (nes "ab") == Just (nes "ab") - assert $ takeRight 3 (nes "ab") == Just (nes "ab") - assert $ takeRight (-1) (nes "ab") == Nothing + assertEqual + { actual: NES.replaceAll (Pattern "[b]") (NES.NonEmptyReplacement (nes (SProxy :: SProxy "!"))) (nes (SProxy :: SProxy "a[b]c")) + , expected: nes (SProxy :: SProxy "a!c") + } + assertEqual + { actual: NES.replaceAll (Pattern "[b]") (NES.NonEmptyReplacement (nes (SProxy :: SProxy "!"))) (nes (SProxy :: SProxy "a[b]c[b]")) + , expected: nes (SProxy :: SProxy "a!c!") + } + assertEqual + { actual: NES.replaceAll (Pattern "x") (NES.NonEmptyReplacement (nes (SProxy :: SProxy "!"))) (nes (SProxy :: SProxy "abc")) + , expected: nes (SProxy :: SProxy "abc") + } - log "drop" - assert $ drop 0 (nes "ab") == Just (nes "ab") - assert $ drop 1 (nes "ab") == Just (nes "b") - assert $ drop 2 (nes "ab") == Nothing - assert $ drop 3 (nes "ab") == Nothing - assert $ drop (-1) (nes "ab") == Just (nes "ab") - - log "dropRight" - assert $ dropRight 0 (nes "ab") == Just (nes "ab") - assert $ dropRight 1 (nes "ab") == Just (nes "a") - assert $ dropRight 2 (nes "ab") == Nothing - assert $ dropRight 3 (nes "ab") == Nothing - assert $ dropRight (-1) (nes "ab") == Just (nes "ab") - - log "countPrefix" - assert $ countPrefix (_ == 'a') (nes "ab") == 1 - assert $ countPrefix (_ == 'a') (nes "aaab") == 3 - assert $ countPrefix (_ == 'a') (nes "abaa") == 1 - assert $ countPrefix (_ == 'c') (nes "abaa") == 0 + log "stripPrefix" + assertEqual + { actual: NES.stripPrefix (Pattern "") (nes (SProxy :: SProxy "abc")) + , expected: Just (nes (SProxy :: SProxy "abc")) + } + assertEqual + { actual: NES.stripPrefix (Pattern "a") (nes (SProxy :: SProxy "abc")) + , expected: Just (nes (SProxy :: SProxy "bc")) + } + assertEqual + { actual: NES.stripPrefix (Pattern "abc") (nes (SProxy :: SProxy "abc")) + , expected: Nothing + } + assertEqual + { actual: NES.stripPrefix (Pattern "!") (nes (SProxy :: SProxy "abc")) + , expected: Nothing + } + assertEqual + { actual: NES.stripPrefix (Pattern "http:") (nes (SProxy :: SProxy "http://purescript.org")) + , expected: Just (nes (SProxy :: SProxy "//purescript.org")) + } + assertEqual + { actual: NES.stripPrefix (Pattern "http:") (nes (SProxy :: SProxy "https://purescript.org")) + , expected: Nothing + } + assertEqual + { actual: NES.stripPrefix (Pattern "Hello!") (nes (SProxy :: SProxy "Hello!")) + , expected: Nothing + } - log "splitAt" - let - testSplitAt i str res = - assert $ case splitAt i str of - { before, after } -> res.before == before && res.after == after - testSplitAt 0 (nes "a") { before: Nothing, after: Just (nes "a") } - testSplitAt 1 (nes "ab") { before: Just (nes "a"), after: Just (nes "b") } - testSplitAt 3 (nes "aabcc") { before: Just (nes "aab"), after: Just (nes "cc") } - testSplitAt (-1) (nes "abc") { before: Nothing, after: Just (nes "abc") } + log "stripSuffix" + assertEqual + { actual: NES.stripSuffix (Pattern ".exe") (nes (SProxy :: SProxy "purs.exe")) + , expected: Just (nes (SProxy :: SProxy "purs")) + } + assertEqual + { actual: NES.stripSuffix (Pattern ".exe") (nes (SProxy :: SProxy "purs")) + , expected: Nothing + } + assertEqual + { actual: NES.stripSuffix (Pattern "Hello!") (nes (SProxy :: SProxy "Hello!")) + , expected: Nothing + } log "toLower" - assert $ toLower (nes "bAtMaN") == nes "batman" + assertEqual + { actual: NES.toLower (nes (SProxy :: SProxy "bAtMaN")) + , expected: nes (SProxy :: SProxy "batman") + } log "toUpper" - assert $ toUpper (nes "bAtMaN") == nes "BATMAN" + assertEqual + { actual: NES.toUpper (nes (SProxy :: SProxy "bAtMaN")) + , expected: nes (SProxy :: SProxy "BATMAN") + } log "trim" - assert $ trim (nes " abc ") == Just (nes "abc") - assert $ trim (nes " \n") == Nothing + assertEqual + { actual: NES.trim (nes (SProxy :: SProxy " abc ")) + , expected: Just (nes (SProxy :: SProxy "abc")) + } + assertEqual + { actual: NES.trim (nes (SProxy :: SProxy " \n")) + , expected: Nothing + } log "joinWith" - assert $ joinWith "" [] == "" - assert $ joinWith "" [nes "a", nes "b"] == "ab" - assert $ joinWith "--" [nes "a", nes "b", nes "c"] == "a--b--c" + assertEqual + { actual: NES.joinWith "" [] + , expected: "" + } + assertEqual + { actual: NES.joinWith "" [nes (SProxy :: SProxy "a"), nes (SProxy :: SProxy "b")] + , expected: "ab" + } + assertEqual + { actual: NES.joinWith "--" [nes (SProxy :: SProxy "a"), nes (SProxy :: SProxy "b"), nes (SProxy :: SProxy "c")] + , expected: "a--b--c" + } log "join1With" - assert $ join1With "" (NEA [nes "a", nes "b"]) == nes "ab" - assert $ join1With "--" (NEA [nes "a", nes "b", nes "c"]) == nes "a--b--c" - assert $ join1With ", " (NEA [nes "apple", nes "banana"]) == nes "apple, banana" - assert $ join1With "" (NEA [nes "apple", nes "banana"]) == nes "applebanana" + assertEqual + { actual: NES.join1With "" (nea [nes (SProxy :: SProxy "a"), nes (SProxy :: SProxy "b")]) + , expected: nes (SProxy :: SProxy "ab") + } + assertEqual + { actual: NES.join1With "--" (nea [nes (SProxy :: SProxy "a"), nes (SProxy :: SProxy "b"), nes (SProxy :: SProxy "c")]) + , expected: nes (SProxy :: SProxy "a--b--c") + } + assertEqual + { actual: NES.join1With ", " (nea [nes (SProxy :: SProxy "apple"), nes (SProxy :: SProxy "banana")]) + , expected: nes (SProxy :: SProxy "apple, banana") + } + assertEqual + { actual: NES.join1With "" (nea [nes (SProxy :: SProxy "apple"), nes (SProxy :: SProxy "banana")]) + , expected: nes (SProxy :: SProxy "applebanana") + } log "joinWith1" - assert $ joinWith1 (nes " ") (NEA ["a", "b"]) == nes "a b" - assert $ joinWith1 (nes "--") (NEA ["a", "b", "c"]) == nes "a--b--c" - assert $ joinWith1 (nes ", ") (NEA ["apple", "banana"]) == nes "apple, banana" - assert $ joinWith1 (nes "/") (NEA ["a", "b", "", "c", ""]) == nes "a/b//c/" - -nes :: String -> NonEmptyString -nes = unsafePartial unsafeFromString - -newtype NEA a = NEA (Array a) - -derive newtype instance functorNEA :: Functor NEA -derive newtype instance foldableNEA :: Foldable NEA - -instance foldable1NEA :: Foldable1 NEA where - foldMap1 a = foldMap1Default a - fold1 (NEA as) = foldl append (unsafePartial AP.head as) (unsafePartial AP.tail as) + assertEqual + { actual: NES.joinWith1 (nes (SProxy :: SProxy " ")) (nea ["a", "b"]) + , expected: nes (SProxy :: SProxy "a b") + } + assertEqual + { actual: NES.joinWith1 (nes (SProxy :: SProxy "--")) (nea ["a", "b", "c"]) + , expected: nes (SProxy :: SProxy "a--b--c") + } + assertEqual + { actual: NES.joinWith1 (nes (SProxy :: SProxy ", ")) (nea ["apple", "banana"]) + , expected: nes (SProxy :: SProxy "apple, banana") + } + assertEqual + { actual: NES.joinWith1 (nes (SProxy :: SProxy "/")) (nea ["a", "b", "", "c", ""]) + , expected: nes (SProxy :: SProxy "a/b//c/") + } + +nea :: Array ~> NEA.NonEmptyArray +nea = unsafePartial fromJust <<< NEA.fromArray diff --git a/test/Test/Data/String/NonEmpty/CodeUnits.purs b/test/Test/Data/String/NonEmpty/CodeUnits.purs new file mode 100644 index 0000000..fee9b51 --- /dev/null +++ b/test/Test/Data/String/NonEmpty/CodeUnits.purs @@ -0,0 +1,450 @@ +module Test.Data.String.NonEmpty.CodeUnits (testNonEmptyStringCodeUnits) where + +import Prelude + +import Data.Array.NonEmpty as NEA +import Data.Enum (fromEnum) +import Data.Maybe (Maybe(..), fromJust) +import Data.String.NonEmpty (Pattern(..), nes) +import Data.String.NonEmpty.CodeUnits as NESCU +import Data.Symbol (SProxy(..)) +import Effect (Effect) +import Effect.Console (log) +import Partial.Unsafe (unsafePartial) +import Test.Assert (assertEqual) + +testNonEmptyStringCodeUnits :: Effect Unit +testNonEmptyStringCodeUnits = do + + log "fromCharArray" + assertEqual + { actual: NESCU.fromCharArray [] + , expected: Nothing + } + assertEqual + { actual: NESCU.fromCharArray ['a', 'b'] + , expected: Just (nes (SProxy :: SProxy "ab")) + } + + log "fromNonEmptyCharArray" + assertEqual + { actual: NESCU.fromNonEmptyCharArray (NEA.singleton 'b') + , expected: NESCU.singleton 'b' + } + + log "singleton" + assertEqual + { actual: NESCU.singleton 'a' + , expected: nes (SProxy :: SProxy "a") + } + + log "cons" + assertEqual + { actual: NESCU.cons 'a' "bc" + , expected: nes (SProxy :: SProxy "abc") + } + assertEqual + { actual: NESCU.cons 'a' "" + , expected: nes (SProxy :: SProxy "a") + } + + log "snoc" + assertEqual + { actual: NESCU.snoc 'c' "ab" + , expected: nes (SProxy :: SProxy "abc") + } + assertEqual + { actual: NESCU.snoc 'a' "" + , expected: nes (SProxy :: SProxy "a") + } + + log "fromFoldable1" + assertEqual + { actual: NESCU.fromFoldable1 (nea ['a']) + , expected: nes (SProxy :: SProxy "a") + } + assertEqual + { actual: NESCU.fromFoldable1 (nea ['a', 'b', 'c']) + , expected: nes (SProxy :: SProxy "abc") + } + + log "charAt" + assertEqual + { actual: NESCU.charAt 0 (nes (SProxy :: SProxy "a")) + , expected: Just 'a' + } + assertEqual + { actual: NESCU.charAt 1 (nes (SProxy :: SProxy "a")) + , expected: Nothing + } + assertEqual + { actual: NESCU.charAt 0 (nes (SProxy :: SProxy "ab")) + , expected: Just 'a' + } + assertEqual + { actual: NESCU.charAt 1 (nes (SProxy :: SProxy "ab")) + , expected: Just 'b' + } + assertEqual + { actual: NESCU.charAt 2 (nes (SProxy :: SProxy "ab")) + , expected: Nothing + } + assertEqual + { actual: NESCU.charAt 2 (nes (SProxy :: SProxy "Hello")) + , expected: Just 'l' + } + assertEqual + { actual: NESCU.charAt 10 (nes (SProxy :: SProxy "Hello")) + , expected: Nothing + } + + log "charCodeAt" + assertEqual + { actual: fromEnum <$> NESCU.charAt 0 (nes (SProxy :: SProxy "a")) + , expected: Just 97 + } + assertEqual + { actual: fromEnum <$> NESCU.charAt 1 (nes (SProxy :: SProxy "a")) + , expected: Nothing + } + assertEqual + { actual: fromEnum <$> NESCU.charAt 0 (nes (SProxy :: SProxy "ab")) + , expected: Just 97 + } + assertEqual + { actual: fromEnum <$> NESCU.charAt 1 (nes (SProxy :: SProxy "ab")) + , expected: Just 98 + } + assertEqual + { actual: fromEnum <$> NESCU.charAt 2 (nes (SProxy :: SProxy "ab")) + , expected: Nothing + } + assertEqual + { actual: fromEnum <$> NESCU.charAt 2 (nes (SProxy :: SProxy "5 €")) + , expected: Just 0x20AC + } + assertEqual + { actual: fromEnum <$> NESCU.charAt 10 (nes (SProxy :: SProxy "5 €")) + , expected: Nothing + } + + log "toChar" + assertEqual + { actual: NESCU.toChar (nes (SProxy :: SProxy "a")) + , expected: Just 'a' + } + assertEqual + { actual: NESCU.toChar (nes (SProxy :: SProxy "ab")) + , expected: Nothing + } + + log "toCharArray" + assertEqual + { actual: NESCU.toCharArray (nes (SProxy :: SProxy "a")) + , expected: ['a'] + } + assertEqual + { actual: NESCU.toCharArray (nes (SProxy :: SProxy "ab")) + , expected: ['a', 'b'] + } + assertEqual + { actual: NESCU.toCharArray (nes (SProxy :: SProxy "Hello☺\n")) + , expected: ['H','e','l','l','o','☺','\n'] + } + + log "toNonEmptyCharArray" + assertEqual + { actual: NESCU.toNonEmptyCharArray (nes (SProxy :: SProxy "ab")) + , expected: nea ['a', 'b'] + } + + log "uncons" + assertEqual + { actual: NESCU.uncons (nes (SProxy :: SProxy "a")) + , expected: { head: 'a', tail: Nothing } + } + assertEqual + { actual: NESCU.uncons (nes (SProxy :: SProxy "Hello World")) + , expected: { head: 'H', tail: Just (nes (SProxy :: SProxy "ello World")) } + } + + log "takeWhile" + assertEqual + { actual: NESCU.takeWhile (\c -> true) (nes (SProxy :: SProxy "abc")) + , expected: Just (nes (SProxy :: SProxy "abc")) + } + assertEqual + { actual: NESCU.takeWhile (\c -> false) (nes (SProxy :: SProxy "abc")) + , expected: Nothing + } + assertEqual + { actual: NESCU.takeWhile (\c -> c /= 'b') (nes (SProxy :: SProxy "aabbcc")) + , expected: Just (nes (SProxy :: SProxy "aa")) + } + assertEqual + { actual: NESCU.takeWhile (_ /= ':') (nes (SProxy :: SProxy "http://purescript.org")) + , expected: Just (nes (SProxy :: SProxy "http")) + } + assertEqual + { actual: NESCU.takeWhile (_ == 'a') (nes (SProxy :: SProxy "xyz")) + , expected: Nothing + } + + log "dropWhile" + assertEqual + { actual: NESCU.dropWhile (\c -> true) (nes (SProxy :: SProxy "abc")) + , expected: Nothing + } + assertEqual + { actual: NESCU.dropWhile (\c -> false) (nes (SProxy :: SProxy "abc")) + , expected: Just (nes (SProxy :: SProxy "abc")) + } + assertEqual + { actual: NESCU.dropWhile (\c -> c /= 'b') (nes (SProxy :: SProxy "aabbcc")) + , expected: Just (nes (SProxy :: SProxy "bbcc")) + } + assertEqual + { actual: NESCU.dropWhile (_ /= '.') (nes (SProxy :: SProxy "Test.purs")) + , expected: Just (nes (SProxy :: SProxy ".purs")) + } + + log "indexOf" + assertEqual + { actual: NESCU.indexOf (Pattern "") (nes (SProxy :: SProxy "abcd")) + , expected: Just 0 + } + assertEqual + { actual: NESCU.indexOf (Pattern "bc") (nes (SProxy :: SProxy "abcd")) + , expected: Just 1 + } + assertEqual + { actual: NESCU.indexOf (Pattern "cb") (nes (SProxy :: SProxy "abcd")) + , expected: Nothing + } + + log "indexOf'" + assertEqual + { actual: NESCU.indexOf' (Pattern "") (-1) (nes (SProxy :: SProxy "ab")) + , expected: Nothing + } + assertEqual + { actual: NESCU.indexOf' (Pattern "") 0 (nes (SProxy :: SProxy "ab")) + , expected: Just 0 + } + assertEqual + { actual: NESCU.indexOf' (Pattern "") 1 (nes (SProxy :: SProxy "ab")) + , expected: Just 1 + } + assertEqual + { actual: NESCU.indexOf' (Pattern "") 2 (nes (SProxy :: SProxy "ab")) + , expected: Just 2 + } + assertEqual + { actual: NESCU.indexOf' (Pattern "") 3 (nes (SProxy :: SProxy "ab")) + , expected: Nothing + } + assertEqual + { actual: NESCU.indexOf' (Pattern "bc") 0 (nes (SProxy :: SProxy "abcd")) + , expected: Just 1 + } + assertEqual + { actual: NESCU.indexOf' (Pattern "bc") 1 (nes (SProxy :: SProxy "abcd")) + , expected: Just 1 + } + assertEqual + { actual: NESCU.indexOf' (Pattern "bc") 2 (nes (SProxy :: SProxy "abcd")) + , expected: Nothing + } + assertEqual + { actual: NESCU.indexOf' (Pattern "cb") 0 (nes (SProxy :: SProxy "abcd")) + , expected: Nothing + } + + log "lastIndexOf" + assertEqual + { actual: NESCU.lastIndexOf (Pattern "") (nes (SProxy :: SProxy "abcd")) + , expected: Just 4 + } + assertEqual + { actual: NESCU.lastIndexOf (Pattern "bc") (nes (SProxy :: SProxy "abcd")) + , expected: Just 1 + } + assertEqual + { actual: NESCU.lastIndexOf (Pattern "cb") (nes (SProxy :: SProxy "abcd")) + , expected: Nothing + } + + log "lastIndexOf'" + assertEqual + { actual: NESCU.lastIndexOf' (Pattern "") (-1) (nes (SProxy :: SProxy "ab")) + , expected: Nothing + } + assertEqual + { actual: NESCU.lastIndexOf' (Pattern "") 0 (nes (SProxy :: SProxy "ab")) + , expected: Just 0 + } + assertEqual + { actual: NESCU.lastIndexOf' (Pattern "") 1 (nes (SProxy :: SProxy "ab")) + , expected: Just 1 + } + assertEqual + { actual: NESCU.lastIndexOf' (Pattern "") 2 (nes (SProxy :: SProxy "ab")) + , expected: Just 2 + } + assertEqual + { actual: NESCU.lastIndexOf' (Pattern "") 3 (nes (SProxy :: SProxy "ab")) + , expected: Nothing + } + assertEqual + { actual: NESCU.lastIndexOf' (Pattern "bc") 0 (nes (SProxy :: SProxy "abcd")) + , expected: Nothing + } + assertEqual + { actual: NESCU.lastIndexOf' (Pattern "bc") 1 (nes (SProxy :: SProxy "abcd")) + , expected: Just 1 + } + assertEqual + { actual: NESCU.lastIndexOf' (Pattern "bc") 2 (nes (SProxy :: SProxy "abcd")) + , expected: Just 1 + } + assertEqual + { actual: NESCU.lastIndexOf' (Pattern "cb") 0 (nes (SProxy :: SProxy "abcd")) + , expected: Nothing + } + + log "length" + assertEqual + { actual: NESCU.length (nes (SProxy :: SProxy "a")) + , expected: 1 + } + assertEqual + { actual: NESCU.length (nes (SProxy :: SProxy "ab")) + , expected: 2 + } + + log "take" + assertEqual + { actual: NESCU.take 0 (nes (SProxy :: SProxy "ab")) + , expected: Nothing + } + assertEqual + { actual: NESCU.take 1 (nes (SProxy :: SProxy "ab")) + , expected: Just (nes (SProxy :: SProxy "a")) + } + assertEqual + { actual: NESCU.take 2 (nes (SProxy :: SProxy "ab")) + , expected: Just (nes (SProxy :: SProxy "ab")) + } + assertEqual + { actual: NESCU.take 3 (nes (SProxy :: SProxy "ab")) + , expected: Just (nes (SProxy :: SProxy "ab")) + } + assertEqual + { actual: NESCU.take (-1) (nes (SProxy :: SProxy "ab")) + , expected: Nothing + } + + log "takeRight" + assertEqual + { actual: NESCU.takeRight 0 (nes (SProxy :: SProxy "ab")) + , expected: Nothing + } + assertEqual + { actual: NESCU.takeRight 1 (nes (SProxy :: SProxy "ab")) + , expected: Just (nes (SProxy :: SProxy "b")) + } + assertEqual + { actual: NESCU.takeRight 2 (nes (SProxy :: SProxy "ab")) + , expected: Just (nes (SProxy :: SProxy "ab")) + } + assertEqual + { actual: NESCU.takeRight 3 (nes (SProxy :: SProxy "ab")) + , expected: Just (nes (SProxy :: SProxy "ab")) + } + assertEqual + { actual: NESCU.takeRight (-1) (nes (SProxy :: SProxy "ab")) + , expected: Nothing + } + + log "drop" + assertEqual + { actual: NESCU.drop 0 (nes (SProxy :: SProxy "ab")) + , expected: Just (nes (SProxy :: SProxy "ab")) + } + assertEqual + { actual: NESCU.drop 1 (nes (SProxy :: SProxy "ab")) + , expected: Just (nes (SProxy :: SProxy "b")) + } + assertEqual + { actual: NESCU.drop 2 (nes (SProxy :: SProxy "ab")) + , expected: Nothing + } + assertEqual + { actual: NESCU.drop 3 (nes (SProxy :: SProxy "ab")) + , expected: Nothing + } + assertEqual + { actual: NESCU.drop (-1) (nes (SProxy :: SProxy "ab")) + , expected: Just (nes (SProxy :: SProxy "ab")) + } + + log "dropRight" + assertEqual + { actual: NESCU.dropRight 0 (nes (SProxy :: SProxy "ab")) + , expected: Just (nes (SProxy :: SProxy "ab")) + } + assertEqual + { actual: NESCU.dropRight 1 (nes (SProxy :: SProxy "ab")) + , expected: Just (nes (SProxy :: SProxy "a")) + } + assertEqual + { actual: NESCU.dropRight 2 (nes (SProxy :: SProxy "ab")) + , expected: Nothing + } + assertEqual + { actual: NESCU.dropRight 3 (nes (SProxy :: SProxy "ab")) + , expected: Nothing + } + assertEqual + { actual: NESCU.dropRight (-1) (nes (SProxy :: SProxy "ab")) + , expected: Just (nes (SProxy :: SProxy "ab")) + } + + log "countPrefix" + assertEqual + { actual: NESCU.countPrefix (_ == 'a') (nes (SProxy :: SProxy "ab")) + , expected: 1 + } + assertEqual + { actual: NESCU.countPrefix (_ == 'a') (nes (SProxy :: SProxy "aaab")) + , expected: 3 + } + assertEqual + { actual: NESCU.countPrefix (_ == 'a') (nes (SProxy :: SProxy "abaa")) + , expected: 1 + } + assertEqual + { actual: NESCU.countPrefix (_ == 'c') (nes (SProxy :: SProxy "abaa")) + , expected: 0 + } + + log "splitAt" + assertEqual + { actual: NESCU.splitAt 0 (nes (SProxy :: SProxy "a")) + , expected: { before: Nothing, after: Just (nes (SProxy :: SProxy "a")) } + } + assertEqual + { actual: NESCU.splitAt 1 (nes (SProxy :: SProxy "ab")) + , expected: { before: Just (nes (SProxy :: SProxy "a")), after: Just (nes (SProxy :: SProxy "b")) } + } + assertEqual + { actual: NESCU.splitAt 3 (nes (SProxy :: SProxy "aabcc")) + , expected: { before: Just (nes (SProxy :: SProxy "aab")), after: Just (nes (SProxy :: SProxy "cc")) } + } + assertEqual + { actual: NESCU.splitAt (-1) (nes (SProxy :: SProxy "abc")) + , expected: { before: Nothing, after: Just (nes (SProxy :: SProxy "abc")) } + } + +nea :: Array ~> NEA.NonEmptyArray +nea = unsafePartial fromJust <<< NEA.fromArray diff --git a/test/Test/Data/String/Unsafe.purs b/test/Test/Data/String/Unsafe.purs index c87055a..b6b9aca 100644 --- a/test/Test/Data/String/Unsafe.purs +++ b/test/Test/Data/String/Unsafe.purs @@ -1,23 +1,26 @@ module Test.Data.String.Unsafe (testStringUnsafe) where -import Prelude (Unit, (==), ($), discard) +import Prelude +import Data.String.Unsafe as SU import Effect (Effect) import Effect.Console (log) - -import Data.String.Unsafe - -import Test.Assert (assert) +import Test.Assert (assertEqual) testStringUnsafe :: Effect Unit testStringUnsafe = do - log "charCodeAt" - assert $ charCodeAt 0 "ab" == 97 - assert $ charCodeAt 1 "ab" == 98 - log "charAt" - assert $ charAt 0 "ab" == 'a' - assert $ charAt 1 "ab" == 'b' + assertEqual + { actual: SU.charAt 0 "ab" + , expected: 'a' + } + assertEqual + { actual: SU.charAt 1 "ab" + , expected: 'b' + } log "char" - assert $ char "a" == 'a' + assertEqual + { actual: SU.char "a" + , expected: 'a' + } diff --git a/test/Test/Main.purs b/test/Test/Main.purs index 92d6cd1..ad392d7 100644 --- a/test/Test/Main.purs +++ b/test/Test/Main.purs @@ -8,7 +8,9 @@ import Test.Data.Char (testChar) import Test.Data.String (testString) import Test.Data.String.CaseInsensitive (testCaseInsensitiveString) import Test.Data.String.CodePoints (testStringCodePoints) +import Test.Data.String.CodeUnits (testStringCodeUnits) import Test.Data.String.NonEmpty (testNonEmptyString) +import Test.Data.String.NonEmpty.CodeUnits (testNonEmptyStringCodeUnits) import Test.Data.String.Regex (testStringRegex) import Test.Data.String.Unsafe (testStringUnsafe) @@ -20,6 +22,8 @@ main = do testString log "\n--- Data.String.CodePoints ---\n" testStringCodePoints + log "\n--- Data.String.CodeUnits ---\n" + testStringCodeUnits log "\n--- Data.String.Unsafe ---\n" testStringUnsafe log "\n--- Data.String.Regex ---\n" @@ -28,3 +32,5 @@ main = do testCaseInsensitiveString log "\n--- Data.String.NonEmpty ---\n" testNonEmptyString + log "\n--- Data.String.NonEmpty.CodeUnits ---\n" + testNonEmptyStringCodeUnits From 765e1df50993e01c6526ab9bfd028df3074ae8c9 Mon Sep 17 00:00:00 2001 From: Gary Burgess Date: Mon, 21 May 2018 23:11:05 +0100 Subject: [PATCH 13/18] Remove Char module - toLower/toUpper should return String anyway --- src/Data/Char.js | 9 --------- src/Data/Char.purs | 11 ----------- test/Test/Data/Char.purs | 31 ------------------------------- test/Test/Main.purs | 3 --- 4 files changed, 54 deletions(-) delete mode 100644 src/Data/Char.js delete mode 100644 src/Data/Char.purs delete mode 100644 test/Test/Data/Char.purs diff --git a/src/Data/Char.js b/src/Data/Char.js deleted file mode 100644 index d396533..0000000 --- a/src/Data/Char.js +++ /dev/null @@ -1,9 +0,0 @@ -"use strict"; - -exports.toLower = function (c) { - return c.toLowerCase(); -}; - -exports.toUpper = function (c) { - return c.toUpperCase(); -}; diff --git a/src/Data/Char.purs b/src/Data/Char.purs deleted file mode 100644 index 169abb0..0000000 --- a/src/Data/Char.purs +++ /dev/null @@ -1,11 +0,0 @@ --- | A type and functions for single characters. -module Data.Char - ( toLower - , toUpper - ) where - --- | Converts a character to lowercase. -foreign import toLower :: Char -> Char - --- | Converts a character to uppercase. -foreign import toUpper :: Char -> Char diff --git a/test/Test/Data/Char.purs b/test/Test/Data/Char.purs deleted file mode 100644 index f6d80ea..0000000 --- a/test/Test/Data/Char.purs +++ /dev/null @@ -1,31 +0,0 @@ -module Test.Data.Char (testChar) where - -import Prelude - -import Data.Char as C -import Effect (Effect) -import Effect.Console (log) -import Test.Assert (assertEqual) - -testChar :: Effect Unit -testChar = do - - log "toLower" - assertEqual - { actual: C.toLower 'A' - , expected: 'a' - } - assertEqual - { actual: C.toLower 'a' - , expected: 'a' - } - - log "toUpper" - assertEqual - { actual: C.toUpper 'a' - , expected: 'A' - } - assertEqual - { actual: C.toUpper 'A' - , expected: 'A' - } diff --git a/test/Test/Main.purs b/test/Test/Main.purs index ad392d7..fb9f32e 100644 --- a/test/Test/Main.purs +++ b/test/Test/Main.purs @@ -4,7 +4,6 @@ import Prelude import Effect (Effect) import Effect.Console (log) -import Test.Data.Char (testChar) import Test.Data.String (testString) import Test.Data.String.CaseInsensitive (testCaseInsensitiveString) import Test.Data.String.CodePoints (testStringCodePoints) @@ -16,8 +15,6 @@ import Test.Data.String.Unsafe (testStringUnsafe) main :: Effect Unit main = do - log "\n--- Data.Char ---\n" - testChar log "\n--- Data.String ---\n" testString log "\n--- Data.String.CodePoints ---\n" From 0903ec7efe220bb627beb052301b0545914d3c69 Mon Sep 17 00:00:00 2001 From: Christoph Date: Tue, 22 May 2018 16:10:05 +0200 Subject: [PATCH 14/18] resurrect toCharCode and fromCharCode --- src/Data/Char.purs | 13 +++++++++++++ 1 file changed, 13 insertions(+) create mode 100644 src/Data/Char.purs diff --git a/src/Data/Char.purs b/src/Data/Char.purs new file mode 100644 index 0000000..81c3fad --- /dev/null +++ b/src/Data/Char.purs @@ -0,0 +1,13 @@ +-- | A type and functions for single characters. +module Data.Char + ( toCharCode + , fromCharCode + ) where + +-- | Returns the numeric Unicode value of the character. +toCharCode :: Char -> Int +toCharCode = fromEnum + +-- | Constructs a character from the given Unicode numeric value. +fromCharCode :: Int -> Maybe Char +fromCharCode = toEnum From 6bbabd865a3af6d82ed7e955c232d43c6f41f104 Mon Sep 17 00:00:00 2001 From: Christoph Date: Tue, 22 May 2018 16:27:38 +0200 Subject: [PATCH 15/18] :facepalm: --- src/Data/Char.purs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/Data/Char.purs b/src/Data/Char.purs index 81c3fad..bb413b7 100644 --- a/src/Data/Char.purs +++ b/src/Data/Char.purs @@ -4,6 +4,9 @@ module Data.Char , fromCharCode ) where +import Data.Enum (fromEnum, toEnum) +import Data.Maybe (Maybe) + -- | Returns the numeric Unicode value of the character. toCharCode :: Char -> Int toCharCode = fromEnum From 1fbc4c0cf0fb816870a6841fa83c5cbdcddaaf22 Mon Sep 17 00:00:00 2001 From: Christoph Date: Tue, 22 May 2018 17:59:44 +0200 Subject: [PATCH 16/18] reexport Data.String.CodePoints from Data.String --- src/Data/String.purs | 11 ++++++----- src/Data/String/CodePoints.purs | 9 +++++---- src/Data/String/CodeUnits.purs | 7 ++----- src/Data/String/Regex.purs | 3 ++- 4 files changed, 15 insertions(+), 15 deletions(-) diff --git a/src/Data/String.purs b/src/Data/String.purs index a382e6e..742f265 100644 --- a/src/Data/String.purs +++ b/src/Data/String.purs @@ -1,9 +1,10 @@ module Data.String - ( module Data.String.Pattern - , module Data.String.Common - , module Data.String.CodeUnits + ( module Data.String.Common + , module Data.String.CodePoints + , module Data.String.Pattern ) where -import Data.String.Pattern (Pattern(..), Replacement(..)) +import Data.String.CodePoints + import Data.String.Common (joinWith, localeCompare, null, replace, replaceAll, split, toLower, toUpper, trim) -import Data.String.CodeUnits (contains, stripPrefix, stripSuffix) +import Data.String.Pattern (Pattern(..), Replacement(..)) diff --git a/src/Data/String/CodePoints.purs b/src/Data/String/CodePoints.purs index 473731c..c9b5b15 100644 --- a/src/Data/String/CodePoints.purs +++ b/src/Data/String/CodePoints.purs @@ -4,7 +4,7 @@ -- | strings, these functions should be preferred over the ones in -- | `Data.String.CodeUnits`. module Data.String.CodePoints - ( module Data.String + ( module Exports , CodePoint , codePointFromChar , singleton @@ -34,9 +34,10 @@ import Data.Array as Array import Data.Enum (class BoundedEnum, class Enum, Cardinality(..), defaultPred, defaultSucc, fromEnum, toEnum, toEnumWithDefaults) import Data.Int (hexadecimal, toStringAs) import Data.Maybe (Maybe(..)) -import Data.String as String +import Data.String.CodeUnits (contains, stripPrefix, stripSuffix) as Exports import Data.String.CodeUnits as CU -import Data.String (Pattern(..), Replacement(..), contains, joinWith, localeCompare, null, replace, replaceAll, split, stripPrefix, stripSuffix, toLower, toUpper, trim) +import Data.String.Common (toUpper) +import Data.String.Pattern (Pattern) import Data.String.Unsafe as Unsafe import Data.Tuple (Tuple(..)) import Data.Unfoldable (unfoldr) @@ -49,7 +50,7 @@ derive instance eqCodePoint :: Eq CodePoint derive instance ordCodePoint :: Ord CodePoint instance showCodePoint :: Show CodePoint where - show (CodePoint i) = "(CodePoint 0x" <> String.toUpper (toStringAs hexadecimal i) <> ")" + show (CodePoint i) = "(CodePoint 0x" <> toUpper (toStringAs hexadecimal i) <> ")" instance boundedCodePoint :: Bounded CodePoint where bottom = CodePoint 0 diff --git a/src/Data/String/CodeUnits.purs b/src/Data/String/CodeUnits.purs index b67a639..1b127ef 100644 --- a/src/Data/String/CodeUnits.purs +++ b/src/Data/String/CodeUnits.purs @@ -1,7 +1,5 @@ module Data.String.CodeUnits - ( module Data.String.Pattern - , module Data.String.Common - , stripPrefix + ( stripPrefix , stripSuffix , contains , singleton @@ -29,8 +27,7 @@ module Data.String.CodeUnits import Prelude import Data.Maybe (Maybe(..), isJust) -import Data.String.Common (joinWith, localeCompare, null, replace, replaceAll, split, toLower, toUpper, trim) -import Data.String.Pattern (Pattern(..), Replacement(..)) +import Data.String.Pattern (Pattern(..)) import Data.String.Unsafe as U ------------------------------------------------------------------------------- diff --git a/src/Data/String/Regex.purs b/src/Data/String/Regex.purs index 79a1a9f..d42145a 100644 --- a/src/Data/String/Regex.purs +++ b/src/Data/String/Regex.purs @@ -21,7 +21,8 @@ import Prelude import Data.Array.NonEmpty (NonEmptyArray) import Data.Either (Either(..)) import Data.Maybe (Maybe(..)) -import Data.String (Pattern(..), contains) +import Data.String (contains) +import Data.String.Pattern (Pattern(..)) import Data.String.Regex.Flags (RegexFlags(..), RegexFlagsRec) -- | Wraps Javascript `RegExp` objects. From cdfef73597c9f0c4ed8e157d511d40e6e120ca12 Mon Sep 17 00:00:00 2001 From: Christoph Date: Tue, 22 May 2018 23:18:44 +0200 Subject: [PATCH 17/18] mirror .String structure from .NonEmpty --- src/Data/String/NonEmpty.purs | 242 +---------------------- src/Data/String/NonEmpty/CodePoints.purs | 6 +- src/Data/String/NonEmpty/CodeUnits.purs | 6 +- src/Data/String/NonEmpty/Internal.purs | 219 ++++++++++++++++++++ 4 files changed, 230 insertions(+), 243 deletions(-) create mode 100644 src/Data/String/NonEmpty/Internal.purs diff --git a/src/Data/String/NonEmpty.purs b/src/Data/String/NonEmpty.purs index 02988a1..6b6210c 100644 --- a/src/Data/String/NonEmpty.purs +++ b/src/Data/String/NonEmpty.purs @@ -1,241 +1,9 @@ module Data.String.NonEmpty - ( NonEmptyString - , class MakeNonEmpty, nes - , NonEmptyReplacement(..) - , fromString - , unsafeFromString - , toString - , appendString - , prependString - , contains - , localeCompare - , replace - , replaceAll - , stripPrefix - , stripSuffix - , toLower - , toUpper - , trim - , joinWith - , join1With - , joinWith1 - , module Data.String.Pattern + ( module Data.String.Pattern + , module Data.String.NonEmpty.Internal + , module Data.String.NonEmpty.CodePoints ) where -import Prelude - -import Data.Foldable (class Foldable) -import Data.Foldable as F -import Data.Maybe (Maybe(..), fromJust) -import Data.Semigroup.Foldable (class Foldable1) -import Data.String as String +import Data.String.NonEmpty.Internal (NonEmptyString, class MakeNonEmpty, NonEmptyReplacement(..), appendString, contains, fromString, join1With, joinWith, joinWith1, localeCompare, nes, prependString, replace, replaceAll, stripPrefix, stripSuffix, toLower, toString, toUpper, trim, unsafeFromString) import Data.String.Pattern (Pattern(..)) -import Data.Symbol (class IsSymbol, SProxy, reflectSymbol) -import Prim.TypeError as TE -import Unsafe.Coerce (unsafeCoerce) - --- | A string that is known not to be empty. -newtype NonEmptyString = NonEmptyString String - -derive newtype instance eqNonEmptyString ∷ Eq NonEmptyString -derive newtype instance ordNonEmptyString ∷ Ord NonEmptyString -derive newtype instance semigroupNonEmptyString ∷ Semigroup NonEmptyString - -instance showNonEmptyString :: Show NonEmptyString where - show (NonEmptyString s) = "(NonEmptyString.unsafeFromString " <> show s <> ")" - --- | A helper class for defining non-empty string values at compile time. --- | --- | ``` purescript --- | something :: NonEmptyString --- | something = nes (SProxy :: SProxy "something") --- | ``` -class MakeNonEmpty (s :: Symbol) where - nes :: SProxy s -> NonEmptyString - -instance makeNonEmptyBad :: TE.Fail (TE.Text "Cannot create an NonEmptyString from an empty Symbol") => MakeNonEmpty "" where - nes _ = NonEmptyString "" - -else instance nonEmptyNonEmpty :: IsSymbol s => MakeNonEmpty s where - nes p = NonEmptyString (reflectSymbol p) - --- | A newtype used in cases to specify a non-empty replacement for a pattern. -newtype NonEmptyReplacement = NonEmptyReplacement NonEmptyString - -derive newtype instance eqNonEmptyReplacement :: Eq NonEmptyReplacement -derive newtype instance ordNonEmptyReplacement :: Ord NonEmptyReplacement -derive newtype instance semigroupNonEmptyReplacement ∷ Semigroup NonEmptyReplacement - -instance showNonEmptyReplacement :: Show NonEmptyReplacement where - show (NonEmptyReplacement s) = "(NonEmptyReplacement " <> show s <> ")" - --- | Creates a `NonEmptyString` from a `String`, returning `Nothing` if the --- | input is empty. --- | --- | ```purescript --- | fromString "" = Nothing --- | fromString "hello" = Just (NES.unsafeFromString "hello") --- | ``` -fromString :: String -> Maybe NonEmptyString -fromString = case _ of - "" -> Nothing - s -> Just (NonEmptyString s) - --- | A partial version of `fromString`. -unsafeFromString :: Partial => String -> NonEmptyString -unsafeFromString = fromJust <<< fromString - --- | Converts a `NonEmptyString` back into a standard `String`. -toString :: NonEmptyString -> String -toString (NonEmptyString s) = s - --- | Appends a string to this non-empty string. Since one of the strings is --- | non-empty we know the result will be too. --- | --- | ```purescript --- | appendString (NonEmptyString "Hello") " world" == NonEmptyString "Hello world" --- | appendString (NonEmptyString "Hello") "" == NonEmptyString "Hello" --- | ``` -appendString :: NonEmptyString -> String -> NonEmptyString -appendString (NonEmptyString s1) s2 = NonEmptyString (s1 <> s2) - --- | Prepends a string to this non-empty string. Since one of the strings is --- | non-empty we know the result will be too. --- | --- | ```purescript --- | prependString "be" (NonEmptyString "fore") == NonEmptyString "before" --- | prependString "" (NonEmptyString "fore") == NonEmptyString "fore" --- | ``` -prependString :: String -> NonEmptyString -> NonEmptyString -prependString s1 (NonEmptyString s2) = NonEmptyString (s1 <> s2) - --- | If the string starts with the given prefix, return the portion of the --- | string left after removing it. If the prefix does not match or there is no --- | remainder, the result will be `Nothing`. --- | --- | ```purescript --- | stripPrefix (Pattern "http:") (NonEmptyString "http://purescript.org") == Just (NonEmptyString "//purescript.org") --- | stripPrefix (Pattern "http:") (NonEmptyString "https://purescript.org") == Nothing --- | stripPrefix (Pattern "Hello!") (NonEmptyString "Hello!") == Nothing --- | ``` -stripPrefix :: Pattern -> NonEmptyString -> Maybe NonEmptyString -stripPrefix pat = fromString <=< liftS (String.stripPrefix pat) - --- | If the string ends with the given suffix, return the portion of the --- | string left after removing it. If the suffix does not match or there is no --- | remainder, the result will be `Nothing`. --- | --- | ```purescript --- | stripSuffix (Pattern ".exe") (NonEmptyString "purs.exe") == Just (NonEmptyString "purs") --- | stripSuffix (Pattern ".exe") (NonEmptyString "purs") == Nothing --- | stripSuffix (Pattern "Hello!") (NonEmptyString "Hello!") == Nothing --- | ``` -stripSuffix :: Pattern -> NonEmptyString -> Maybe NonEmptyString -stripSuffix pat = fromString <=< liftS (String.stripSuffix pat) - --- | Checks whether the pattern appears in the given string. --- | --- | ```purescript --- | contains (Pattern "needle") (NonEmptyString "haystack with needle") == true --- | contains (Pattern "needle") (NonEmptyString "haystack") == false --- | ``` -contains :: Pattern -> NonEmptyString -> Boolean -contains = liftS <<< String.contains - --- | Compare two strings in a locale-aware fashion. This is in contrast to --- | the `Ord` instance on `String` which treats strings as arrays of code --- | units: --- | --- | ```purescript --- | NonEmptyString "ä" `localeCompare` NonEmptyString "b" == LT --- | NonEmptyString "ä" `compare` NonEmptyString "b" == GT --- | ``` -localeCompare :: NonEmptyString -> NonEmptyString -> Ordering -localeCompare (NonEmptyString a) (NonEmptyString b) = String.localeCompare a b - --- | Replaces the first occurence of the pattern with the replacement string. --- | --- | ```purescript --- | replace (Pattern "<=") (NonEmptyReplacement "≤") (NonEmptyString "a <= b <= c") == NonEmptyString "a ≤ b <= c" --- | ``` -replace :: Pattern -> NonEmptyReplacement -> NonEmptyString -> NonEmptyString -replace pat (NonEmptyReplacement (NonEmptyString rep)) (NonEmptyString s) = - NonEmptyString (String.replace pat (String.Replacement rep) s) - --- | Replaces all occurences of the pattern with the replacement string. --- | --- | ```purescript --- | replaceAll (Pattern "<=") (NonEmptyReplacement "≤") (NonEmptyString "a <= b <= c") == NonEmptyString "a ≤ b ≤ c" --- | ``` -replaceAll :: Pattern -> NonEmptyReplacement -> NonEmptyString -> NonEmptyString -replaceAll pat (NonEmptyReplacement (NonEmptyString rep)) (NonEmptyString s) = - NonEmptyString (String.replaceAll pat (String.Replacement rep) s) - --- | Returns the argument converted to lowercase. --- | --- | ```purescript --- | toLower (NonEmptyString "hElLo") == NonEmptyString "hello" --- | ``` -toLower :: NonEmptyString -> NonEmptyString -toLower (NonEmptyString s) = NonEmptyString (String.toLower s) - --- | Returns the argument converted to uppercase. --- | --- | ```purescript --- | toUpper (NonEmptyString "Hello") == NonEmptyString "HELLO" --- | ``` -toUpper :: NonEmptyString -> NonEmptyString -toUpper (NonEmptyString s) = NonEmptyString (String.toUpper s) - --- | Removes whitespace from the beginning and end of a string, including --- | [whitespace characters](http://www.ecma-international.org/ecma-262/5.1/#sec-7.2) --- | and [line terminators](http://www.ecma-international.org/ecma-262/5.1/#sec-7.3). --- | If the string is entirely made up of whitespace the result will be Nothing. --- | --- | ```purescript --- | trim (NonEmptyString " Hello \n World\n\t ") == Just (NonEmptyString "Hello \n World") --- | trim (NonEmptyString " \n") == Nothing --- | ``` -trim :: NonEmptyString -> Maybe NonEmptyString -trim (NonEmptyString s) = fromString (String.trim s) - --- | Joins the strings in a container together as a new string, inserting the --- | first argument as separator between them. The result is not guaranteed to --- | be non-empty. --- | --- | ```purescript --- | joinWith ", " [NonEmptyString "apple", NonEmptyString "banana"] == "apple, banana" --- | joinWith ", " [] == "" --- | ``` -joinWith :: forall f. Foldable f => String -> f NonEmptyString -> String -joinWith splice = F.intercalate splice <<< coe - where - coe :: f NonEmptyString -> f String - coe = unsafeCoerce - --- | Joins non-empty strings in a non-empty container together as a new --- | non-empty string, inserting a possibly empty string as separator between --- | them. The result is guaranteed to be non-empty. --- | --- | ```purescript --- | -- array syntax is used for demonstration here, it would need to be a real `Foldable1` --- | join1With ", " [NonEmptyString "apple", NonEmptyString "banana"] == NonEmptyString "apple, banana" --- | join1With "" [NonEmptyString "apple", NonEmptyString "banana"] == NonEmptyString "applebanana" --- | ``` -join1With :: forall f. Foldable1 f => String -> f NonEmptyString -> NonEmptyString -join1With splice = NonEmptyString <<< joinWith splice - --- | Joins possibly empty strings in a non-empty container together as a new --- | non-empty string, inserting a non-empty string as a separator between them. --- | The result is guaranteed to be non-empty. --- | --- | ```purescript --- | -- array syntax is used for demonstration here, it would need to be a real `Foldable1` --- | joinWith1 (NonEmptyString ", ") ["apple", "banana"] == NonEmptyString "apple, banana" --- | joinWith1 (NonEmptyString "/") ["a", "b", "", "c", ""] == NonEmptyString "a/b//c/" --- | ``` -joinWith1 :: forall f. Foldable1 f => NonEmptyString -> f String -> NonEmptyString -joinWith1 (NonEmptyString splice) = NonEmptyString <<< F.intercalate splice - -liftS :: forall r. (String -> r) -> NonEmptyString -> r -liftS f (NonEmptyString s) = f s +import Data.String.NonEmpty.CodePoints diff --git a/src/Data/String/NonEmpty/CodePoints.purs b/src/Data/String/NonEmpty/CodePoints.purs index 5ec1254..5357851 100644 --- a/src/Data/String/NonEmpty/CodePoints.purs +++ b/src/Data/String/NonEmpty/CodePoints.purs @@ -1,6 +1,5 @@ module Data.String.NonEmpty.CodePoints - ( module Data.String.NonEmpty - , fromCodePointArray + ( fromCodePointArray , fromNonEmptyCodePointArray , singleton , cons @@ -34,7 +33,8 @@ import Data.Semigroup.Foldable (class Foldable1) import Data.Semigroup.Foldable as F1 import Data.String.CodePoints (CodePoint) import Data.String.CodePoints as CP -import Data.String.NonEmpty (class MakeNonEmpty, NonEmptyReplacement(..), NonEmptyString, Pattern(..), appendString, contains, fromString, join1With, joinWith, joinWith1, localeCompare, nes, prependString, replace, replaceAll, stripPrefix, stripSuffix, toLower, toString, toUpper, trim, unsafeFromString) +import Data.String.NonEmpty.Internal (NonEmptyString, fromString) +import Data.String.Pattern (Pattern) import Partial.Unsafe (unsafePartial) import Unsafe.Coerce (unsafeCoerce) diff --git a/src/Data/String/NonEmpty/CodeUnits.purs b/src/Data/String/NonEmpty/CodeUnits.purs index 0a826f0..2aa0a6c 100644 --- a/src/Data/String/NonEmpty/CodeUnits.purs +++ b/src/Data/String/NonEmpty/CodeUnits.purs @@ -1,6 +1,5 @@ module Data.String.NonEmpty.CodeUnits - ( module Data.String.NonEmpty - , fromCharArray + ( fromCharArray , fromNonEmptyCharArray , singleton , cons @@ -34,8 +33,9 @@ import Data.Maybe (Maybe(..), fromJust) import Data.Semigroup.Foldable (class Foldable1) import Data.Semigroup.Foldable as F1 import Data.String.CodeUnits as CU +import Data.String.NonEmpty.Internal (NonEmptyString, fromString) +import Data.String.Pattern (Pattern) import Data.String.Unsafe as U -import Data.String.NonEmpty (class MakeNonEmpty, NonEmptyReplacement(..), NonEmptyString, Pattern(..), appendString, contains, fromString, join1With, joinWith, joinWith1, localeCompare, nes, prependString, replace, replaceAll, stripPrefix, stripSuffix, toLower, toString, toUpper, trim, unsafeFromString) import Partial.Unsafe (unsafePartial) import Unsafe.Coerce (unsafeCoerce) diff --git a/src/Data/String/NonEmpty/Internal.purs b/src/Data/String/NonEmpty/Internal.purs new file mode 100644 index 0000000..bfd2984 --- /dev/null +++ b/src/Data/String/NonEmpty/Internal.purs @@ -0,0 +1,219 @@ +module Data.String.NonEmpty.Internal where + +import Prelude + +import Data.Foldable (class Foldable) +import Data.Foldable as F +import Data.Maybe (Maybe(..), fromJust) +import Data.Semigroup.Foldable (class Foldable1) +import Data.String as String +import Data.String.Pattern (Pattern) +import Data.Symbol (class IsSymbol, SProxy, reflectSymbol) +import Prim.TypeError as TE +import Unsafe.Coerce (unsafeCoerce) + +-- | A string that is known not to be empty. +newtype NonEmptyString = NonEmptyString String + +derive newtype instance eqNonEmptyString ∷ Eq NonEmptyString +derive newtype instance ordNonEmptyString ∷ Ord NonEmptyString +derive newtype instance semigroupNonEmptyString ∷ Semigroup NonEmptyString + +instance showNonEmptyString :: Show NonEmptyString where + show (NonEmptyString s) = "(NonEmptyString.unsafeFromString " <> show s <> ")" + +-- | A helper class for defining non-empty string values at compile time. +-- | +-- | ``` purescript +-- | something :: NonEmptyString +-- | something = nes (SProxy :: SProxy "something") +-- | ``` +class MakeNonEmpty (s :: Symbol) where + nes :: SProxy s -> NonEmptyString + +instance makeNonEmptyBad :: TE.Fail (TE.Text "Cannot create an NonEmptyString from an empty Symbol") => MakeNonEmpty "" where + nes _ = NonEmptyString "" + +else instance nonEmptyNonEmpty :: IsSymbol s => MakeNonEmpty s where + nes p = NonEmptyString (reflectSymbol p) + +-- | A newtype used in cases to specify a non-empty replacement for a pattern. +newtype NonEmptyReplacement = NonEmptyReplacement NonEmptyString + +derive newtype instance eqNonEmptyReplacement :: Eq NonEmptyReplacement +derive newtype instance ordNonEmptyReplacement :: Ord NonEmptyReplacement +derive newtype instance semigroupNonEmptyReplacement ∷ Semigroup NonEmptyReplacement + +instance showNonEmptyReplacement :: Show NonEmptyReplacement where + show (NonEmptyReplacement s) = "(NonEmptyReplacement " <> show s <> ")" + +-- | Creates a `NonEmptyString` from a `String`, returning `Nothing` if the +-- | input is empty. +-- | +-- | ```purescript +-- | fromString "" = Nothing +-- | fromString "hello" = Just (NES.unsafeFromString "hello") +-- | ``` +fromString :: String -> Maybe NonEmptyString +fromString = case _ of + "" -> Nothing + s -> Just (NonEmptyString s) + +-- | A partial version of `fromString`. +unsafeFromString :: Partial => String -> NonEmptyString +unsafeFromString = fromJust <<< fromString + +-- | Converts a `NonEmptyString` back into a standard `String`. +toString :: NonEmptyString -> String +toString (NonEmptyString s) = s + +-- | Appends a string to this non-empty string. Since one of the strings is +-- | non-empty we know the result will be too. +-- | +-- | ```purescript +-- | appendString (NonEmptyString "Hello") " world" == NonEmptyString "Hello world" +-- | appendString (NonEmptyString "Hello") "" == NonEmptyString "Hello" +-- | ``` +appendString :: NonEmptyString -> String -> NonEmptyString +appendString (NonEmptyString s1) s2 = NonEmptyString (s1 <> s2) + +-- | Prepends a string to this non-empty string. Since one of the strings is +-- | non-empty we know the result will be too. +-- | +-- | ```purescript +-- | prependString "be" (NonEmptyString "fore") == NonEmptyString "before" +-- | prependString "" (NonEmptyString "fore") == NonEmptyString "fore" +-- | ``` +prependString :: String -> NonEmptyString -> NonEmptyString +prependString s1 (NonEmptyString s2) = NonEmptyString (s1 <> s2) + +-- | If the string starts with the given prefix, return the portion of the +-- | string left after removing it. If the prefix does not match or there is no +-- | remainder, the result will be `Nothing`. +-- | +-- | ```purescript +-- | stripPrefix (Pattern "http:") (NonEmptyString "http://purescript.org") == Just (NonEmptyString "//purescript.org") +-- | stripPrefix (Pattern "http:") (NonEmptyString "https://purescript.org") == Nothing +-- | stripPrefix (Pattern "Hello!") (NonEmptyString "Hello!") == Nothing +-- | ``` +stripPrefix :: Pattern -> NonEmptyString -> Maybe NonEmptyString +stripPrefix pat = fromString <=< liftS (String.stripPrefix pat) + +-- | If the string ends with the given suffix, return the portion of the +-- | string left after removing it. If the suffix does not match or there is no +-- | remainder, the result will be `Nothing`. +-- | +-- | ```purescript +-- | stripSuffix (Pattern ".exe") (NonEmptyString "purs.exe") == Just (NonEmptyString "purs") +-- | stripSuffix (Pattern ".exe") (NonEmptyString "purs") == Nothing +-- | stripSuffix (Pattern "Hello!") (NonEmptyString "Hello!") == Nothing +-- | ``` +stripSuffix :: Pattern -> NonEmptyString -> Maybe NonEmptyString +stripSuffix pat = fromString <=< liftS (String.stripSuffix pat) + +-- | Checks whether the pattern appears in the given string. +-- | +-- | ```purescript +-- | contains (Pattern "needle") (NonEmptyString "haystack with needle") == true +-- | contains (Pattern "needle") (NonEmptyString "haystack") == false +-- | ``` +contains :: Pattern -> NonEmptyString -> Boolean +contains = liftS <<< String.contains + +-- | Compare two strings in a locale-aware fashion. This is in contrast to +-- | the `Ord` instance on `String` which treats strings as arrays of code +-- | units: +-- | +-- | ```purescript +-- | NonEmptyString "ä" `localeCompare` NonEmptyString "b" == LT +-- | NonEmptyString "ä" `compare` NonEmptyString "b" == GT +-- | ``` +localeCompare :: NonEmptyString -> NonEmptyString -> Ordering +localeCompare (NonEmptyString a) (NonEmptyString b) = String.localeCompare a b + +-- | Replaces the first occurence of the pattern with the replacement string. +-- | +-- | ```purescript +-- | replace (Pattern "<=") (NonEmptyReplacement "≤") (NonEmptyString "a <= b <= c") == NonEmptyString "a ≤ b <= c" +-- | ``` +replace :: Pattern -> NonEmptyReplacement -> NonEmptyString -> NonEmptyString +replace pat (NonEmptyReplacement (NonEmptyString rep)) (NonEmptyString s) = + NonEmptyString (String.replace pat (String.Replacement rep) s) + +-- | Replaces all occurences of the pattern with the replacement string. +-- | +-- | ```purescript +-- | replaceAll (Pattern "<=") (NonEmptyReplacement "≤") (NonEmptyString "a <= b <= c") == NonEmptyString "a ≤ b ≤ c" +-- | ``` +replaceAll :: Pattern -> NonEmptyReplacement -> NonEmptyString -> NonEmptyString +replaceAll pat (NonEmptyReplacement (NonEmptyString rep)) (NonEmptyString s) = + NonEmptyString (String.replaceAll pat (String.Replacement rep) s) + +-- | Returns the argument converted to lowercase. +-- | +-- | ```purescript +-- | toLower (NonEmptyString "hElLo") == NonEmptyString "hello" +-- | ``` +toLower :: NonEmptyString -> NonEmptyString +toLower (NonEmptyString s) = NonEmptyString (String.toLower s) + +-- | Returns the argument converted to uppercase. +-- | +-- | ```purescript +-- | toUpper (NonEmptyString "Hello") == NonEmptyString "HELLO" +-- | ``` +toUpper :: NonEmptyString -> NonEmptyString +toUpper (NonEmptyString s) = NonEmptyString (String.toUpper s) + +-- | Removes whitespace from the beginning and end of a string, including +-- | [whitespace characters](http://www.ecma-international.org/ecma-262/5.1/#sec-7.2) +-- | and [line terminators](http://www.ecma-international.org/ecma-262/5.1/#sec-7.3). +-- | If the string is entirely made up of whitespace the result will be Nothing. +-- | +-- | ```purescript +-- | trim (NonEmptyString " Hello \n World\n\t ") == Just (NonEmptyString "Hello \n World") +-- | trim (NonEmptyString " \n") == Nothing +-- | ``` +trim :: NonEmptyString -> Maybe NonEmptyString +trim (NonEmptyString s) = fromString (String.trim s) + +-- | Joins the strings in a container together as a new string, inserting the +-- | first argument as separator between them. The result is not guaranteed to +-- | be non-empty. +-- | +-- | ```purescript +-- | joinWith ", " [NonEmptyString "apple", NonEmptyString "banana"] == "apple, banana" +-- | joinWith ", " [] == "" +-- | ``` +joinWith :: forall f. Foldable f => String -> f NonEmptyString -> String +joinWith splice = F.intercalate splice <<< coe + where + coe :: f NonEmptyString -> f String + coe = unsafeCoerce + +-- | Joins non-empty strings in a non-empty container together as a new +-- | non-empty string, inserting a possibly empty string as separator between +-- | them. The result is guaranteed to be non-empty. +-- | +-- | ```purescript +-- | -- array syntax is used for demonstration here, it would need to be a real `Foldable1` +-- | join1With ", " [NonEmptyString "apple", NonEmptyString "banana"] == NonEmptyString "apple, banana" +-- | join1With "" [NonEmptyString "apple", NonEmptyString "banana"] == NonEmptyString "applebanana" +-- | ``` +join1With :: forall f. Foldable1 f => String -> f NonEmptyString -> NonEmptyString +join1With splice = NonEmptyString <<< joinWith splice + +-- | Joins possibly empty strings in a non-empty container together as a new +-- | non-empty string, inserting a non-empty string as a separator between them. +-- | The result is guaranteed to be non-empty. +-- | +-- | ```purescript +-- | -- array syntax is used for demonstration here, it would need to be a real `Foldable1` +-- | joinWith1 (NonEmptyString ", ") ["apple", "banana"] == NonEmptyString "apple, banana" +-- | joinWith1 (NonEmptyString "/") ["a", "b", "", "c", ""] == NonEmptyString "a/b//c/" +-- | ``` +joinWith1 :: forall f. Foldable1 f => NonEmptyString -> f String -> NonEmptyString +joinWith1 (NonEmptyString splice) = NonEmptyString <<< F.intercalate splice + +liftS :: forall r. (String -> r) -> NonEmptyString -> r +liftS f (NonEmptyString s) = f s From b1d444667ee494019082cf25f2584e8e2144f962 Mon Sep 17 00:00:00 2001 From: Gary Burgess Date: Wed, 23 May 2018 23:53:31 +0100 Subject: [PATCH 18/18] Update dependencies, license, fix esling warning --- LICENSE | 38 ++++++++++++++++++++++---------------- bower.json | 33 ++++++++++++++++++++------------- package.json | 8 ++++---- src/Data/String/Common.js | 2 +- 4 files changed, 47 insertions(+), 34 deletions(-) diff --git a/LICENSE b/LICENSE index 58b0299..311379c 100644 --- a/LICENSE +++ b/LICENSE @@ -1,20 +1,26 @@ -The MIT License (MIT) +Copyright 2018 PureScript -Copyright (c) 2014 PureScript +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: -Permission is hereby granted, free of charge, to any person obtaining a copy of -this software and associated documentation files (the "Software"), to deal in -the Software without restriction, including without limitation the rights to -use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of -the Software, and to permit persons to whom the Software is furnished to do so, -subject to the following conditions: +1. Redistributions of source code must retain the above copyright notice, this +list of conditions and the following disclaimer. -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. +2. Redistributions in binary form must reproduce the above copyright notice, +this list of conditions and the following disclaimer in the documentation and/or +other materials provided with the distribution. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS -FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR -COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER -IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN -CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +3. Neither the name of the copyright holder nor the names of its contributors +may be used to endorse or promote products derived from this software without +specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR +ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON +ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/bower.json b/bower.json index db0c9b5..ec7fb7f 100644 --- a/bower.json +++ b/bower.json @@ -1,8 +1,7 @@ { "name": "purescript-strings", "homepage": "https://github.com/purescript/purescript-strings", - "description": "String and char utility functions, regular expressions.", - "license": "MIT", + "license": "BSD-3-Clause", "repository": { "type": "git", "url": "git://github.com/purescript/purescript-strings.git" @@ -17,18 +16,26 @@ "package.json" ], "dependencies": { - "purescript-arrays": "#compiler/0.12", - "purescript-either": "#compiler/0.12", - "purescript-enums": "#compiler/0.12", - "purescript-gen": "#compiler/0.12", - "purescript-integers": "#compiler/0.12", - "purescript-maybe": "#compiler/0.12", - "purescript-partial": "#compiler/0.12", - "purescript-unfoldable": "#compiler/0.12" + "purescript-arrays": "^5.0.0", + "purescript-control": "^4.0.0", + "purescript-either": "^4.0.0", + "purescript-enums": "^4.0.0", + "purescript-foldable-traversable": "^4.0.0", + "purescript-gen": "^2.0.0", + "purescript-integers": "^4.0.0", + "purescript-maybe": "^4.0.0", + "purescript-newtype": "^3.0.0", + "purescript-nonempty": "^5.0.0", + "purescript-partial": "^2.0.0", + "purescript-prelude": "^4.0.0", + "purescript-tailrec": "^4.0.0", + "purescript-tuples": "^5.0.0", + "purescript-unfoldable": "^4.0.0", + "purescript-unsafe-coerce": "^4.0.0" }, "devDependencies": { - "purescript-assert": "#compiler/0.12", - "purescript-console": "#compiler/0.12", - "purescript-minibench": "#compiler/0.12" + "purescript-assert": "^4.0.0", + "purescript-console": "^4.0.0", + "purescript-minibench": "^2.0.0" } } diff --git a/package.json b/package.json index 64f4ac9..d44d01e 100644 --- a/package.json +++ b/package.json @@ -9,9 +9,9 @@ "bench": "npm run bench:build && npm run bench:run" }, "devDependencies": { - "eslint": "^3.17.1", - "pulp": "^10.0.4", - "purescript-psa": "^0.5.0-rc.1", - "rimraf": "^2.6.1" + "eslint": "^4.19.1", + "pulp": "^12.2.0", + "purescript-psa": "^0.6.0", + "rimraf": "^2.6.2" } } diff --git a/src/Data/String/Common.js b/src/Data/String/Common.js index 3237741..111c02e 100644 --- a/src/Data/String/Common.js +++ b/src/Data/String/Common.js @@ -24,7 +24,7 @@ exports.replace = function (s1) { exports.replaceAll = function (s1) { return function (s2) { return function (s3) { - return s3.replace(new RegExp(s1.replace(/[-\/\\^$*+?.()|[\]{}]/g, "\\$&"), "g"), s2); + return s3.replace(new RegExp(s1.replace(/[-\/\\^$*+?.()|[\]{}]/g, "\\$&"), "g"), s2); // eslint-disable-line no-useless-escape }; }; };