Skip to content

UTF-8 round trip bug #1

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 30 additions & 0 deletions .github/workflows/ci.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
name: CI

on: push

jobs:
build:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2

- uses: purescript-contrib/setup-purescript@main

- name: Cache PureScript dependencies
uses: actions/cache@v2
# This cache uses the .dhall files to know when it should reinstall
# and rebuild packages. It caches both the installed packages from
# the `.spago` directory and compilation artifacts from the `output`
# directory. When restored the compiler will rebuild any files that
# have changed. If you do not want to cache compiled output, remove
# the `output` path.
with:
key: ${{ runner.os }}-spago-${{ hashFiles('**/*.dhall') }}
path: |
.spago
node_modules
output

- run: npm i
- run: npm run build
- run: npm run test
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
/.*
!/.gitignore
!/.github
!/.eslintrc.json
!/.travis.yml
/bower_components/
Expand Down
39 changes: 22 additions & 17 deletions spago.dhall
Original file line number Diff line number Diff line change
@@ -1,22 +1,27 @@
{ name = "bytestrings"
, dependencies =
[ "arrays"
, "console"
, "effect"
, "exceptions"
, "foldable-traversable"
, "integers"
, "leibniz"
, "maybe"
, "newtype"
, "node-buffer"
, "partial"
, "prelude"
, "quickcheck"
, "quickcheck-laws"
, "quotient"
, "unsafe-coerce"
]
[ "arrays"
, "console"
, "effect"
, "enums"
, "exceptions"
, "foldable-traversable"
, "gen"
, "integers"
, "leibniz"
, "maybe"
, "newtype"
, "node-buffer"
, "nonempty"
, "partial"
, "prelude"
, "quickcheck"
, "quickcheck-laws"
, "quotient"
, "strings"
, "tuples"
, "unsafe-coerce"
]
, packages = ./packages.dhall
, sources = [ "src/**/*.purs", "test/**/*.purs" ]
}
62 changes: 53 additions & 9 deletions test/Main.purs
Original file line number Diff line number Diff line change
Expand Up @@ -2,21 +2,31 @@ module Test.Main
( main
) where

import Effect (Effect)
import Effect.Console (log)
import Data.ByteString
import Data.ByteString (ByteString, Encoding(..), Octet, cons, empty, foldl, foldr, fromString, fromUTF8, head, init, isEmpty, last, length, map, pack, reverse, singleton, snoc, tail, toUTF8, uncons, unpack, unsnoc)
import Prelude (Unit, bind, bottom, discard, flip, identity, pure, top, (#), ($), (&&), (+), (-), (/), (<), (<$>), (<*>), (<<<), (<>), (==), (>), (||))

import Control.Monad.Gen (frequency)
import Data.Array (foldMap)
import Data.Enum (toEnumWithDefaults)
import Data.Foldable as Foldable
import Data.Int (toNumber)
import Data.Maybe (Maybe(..))
import Prelude hiding (map)
import Data.NonEmpty (NonEmpty(..))
import Data.String (CodePoint, fromCodePointArray)
import Data.Tuple (Tuple(..))
import Effect (Effect)
import Effect.Console (log)
import Prelude as Prelude
import Test.QuickCheck ((===), quickCheck)
import Test.QuickCheck (class Arbitrary, arbitrary, quickCheck, (===))
import Test.QuickCheck.Gen (arrayOf, chooseInt, suchThat)
import Test.QuickCheck.Laws.Data.Eq (checkEq)
import Test.QuickCheck.Laws.Data.Monoid (checkMonoid)
import Test.QuickCheck.Laws.Data.Ord (checkOrd)
import Test.QuickCheck.Laws.Data.Semigroup (checkSemigroup)
import Type.Proxy (Proxy(..))
import Type.Quotient (mkQuotient, runQuotient)


main :: Effect Unit
main = do
log "laws"
Expand Down Expand Up @@ -87,10 +97,44 @@ main = do
quickCheck $ fromString "ABCD" Hex === Just (withOctets pack [0xAB, 0xCD])
-- this line is commented out as for invalid input result is `pack []` and shuold be fixed later
-- quickCheck $ fromString "LOL" Hex === Nothing

where
subL a b = a - runQuotient b
subR a b = runQuotient a - b
log "utf8"
quickCheck $ \(BMPString s) -> fromUTF8 (toUTF8 s) === s

where
subL a b = a - runQuotient b
subR a b = runQuotient a - b

newtype BMPString = BMPString String

data UnicodeChar = Normal CodePoint | Surrogates CodePoint CodePoint

instance Arbitrary BMPString where
arbitrary = BMPString <$> do
ucs <- arrayOf (arbitrary @UnicodeChar)
pure $ fromCodePointArray $ foldMap f ucs
where
f :: UnicodeChar -> Array CodePoint
f uc = case uc of
Normal a -> [a]
Surrogates a b -> [a, b]

instance Arbitrary UnicodeChar where
arbitrary = frequency $ NonEmpty (Tuple (1.0 - p) normalGen) [Tuple p surrogatesGen]

where
hiLB = 0xD800
hiUB = 0xDBFF
loLB = 0xDC00
loUB = 0xDFFF
maxCP = 65535
toCP = toEnumWithDefaults bottom top
-- must have a high surrogate followed by a low surrogate
surrogatesGen = Surrogates <$> (toCP <$> chooseInt hiLB hiUB) <*> (toCP <$> chooseInt loLB loUB)
normalGen = Normal <<< toCP <$> do
chooseInt 0 maxCP `suchThat` \n ->
(n < hiLB || n > hiUB) && (n < loLB || n > loUB)
-- probability that you pick a surrogate from all possible codepoints
p = toNumber ((hiUB - hiLB + 1) + (loUB - loLB + 1)) / toNumber (maxCP + 1)

withOctet :: ∀ a. (Octet -> a) -> Int -> a
withOctet = flip $ (#) <<< mkQuotient
Expand Down