unisonweb · mergify · Nov 21, 2020 · Nov 6, 2020 · Nov 6, 2020 · Nov 7, 2020
diff --git a/parser-typechecker/src/Unison/Lexer.hs b/parser-typechecker/src/Unison/Lexer.hs
diff --git a/parser-typechecker/src/Unison/Parser.hs b/parser-typechecker/src/Unison/Parser.hs
@@ -30,6 +30,7 @@ import           Unison.Term          (MatchCase (..))
 import           Unison.Var           (Var)
 import qualified Unison.Var           as Var
 import qualified Unison.UnisonFile    as UF
+import Unison.Util.Bytes              (Bytes)
 import Unison.Name as Name
 import Unison.Names3 (Names)
 import qualified Unison.Names3 as Names
@@ -380,6 +381,11 @@ numeric = queryToken getNumeric
   where getNumeric (L.Numeric s) = Just s
         getNumeric _             = Nothing
 
+bytesToken :: Ord v => P v (L.Token Bytes)
+bytesToken = queryToken getBytes
+  where getBytes (L.Bytes bs) = Just bs
+        getBytes _ = Nothing
+
 sepBy :: Ord v => P v a -> P v b -> P v [b]
 sepBy sep pb = P.sepBy pb sep
 

diff --git a/parser-typechecker/src/Unison/PrintError.hs b/parser-typechecker/src/Unison/PrintError.hs
@@ -9,7 +9,7 @@ import Unison.Prelude
 
 import           Control.Lens                 ((%~))
 import           Control.Lens.Tuple           (_1, _2, _3)
-import           Data.List                    (intersperse)
+import           Data.List                    (find, intersperse)
 import           Data.List.Extra              (nubOrd)
 import qualified Data.List.NonEmpty           as Nel
 import qualified Data.Map                     as Map
@@ -888,8 +888,8 @@ _printArrowsAtPos s line column =
 pattern LexerError ts e <- Just (P.Tokens (firstLexerError -> Just (ts, e)))
 
 firstLexerError :: Foldable t => t (L.Token L.Lexeme) -> Maybe ([L.Token L.Lexeme], L.Err)
-firstLexerError (toList -> ts@((L.payload -> L.Err e) : _)) = Just (ts, e)
-firstLexerError _ = Nothing
+firstLexerError ts =
+  find (const True) [ (toList ts, e) | (L.payload -> L.Err e) <- toList ts ]
 
 prettyParseError
   :: forall v
@@ -898,10 +898,79 @@ prettyParseError
   -> Parser.Err v
   -> Pretty ColorText
 prettyParseError s = \case
-  P.TrivialError _ (LexerError ts (L.CloseWithoutMatchingOpen open close)) _ ->
-    "❗️ I found a closing " <> style ErrorSite (fromString close) <>
-    " here without a matching " <> style ErrorSite (fromString open) <> ".\n\n" <>
-    showSource s ((\t -> (rangeForToken t, ErrorSite)) <$> ts)
+  P.TrivialError _ (LexerError ts e) _ -> go e
+    where
+    excerpt = showSource s ((\t -> (rangeForToken t, ErrorSite)) <$> ts)
+    go = \case
+      L.CloseWithoutMatchingOpen open close ->
+        "I found a closing " <> style ErrorSite (fromString close) <>
+        " here without a matching " <> style ErrorSite (fromString open) <> ".\n\n" <>
+        excerpt
+      L.InvalidWordyId _id -> Pr.lines [
+        "This identifier isn't valid syntax: ", "",
+        excerpt,
+        "Here's a few examples of valid syntax: " <>
+        style Code "abba1', snake_case, Foo.zoink!, 🌻" ]
+      L.InvalidSymbolyId _id -> Pr.lines [
+        "This infix identifier isn't valid syntax: ", "",
+        excerpt,
+        "Here's a few valid examples: " <>
+        style Code "++, Float./, `List.map`" ]
+      L.InvalidBytesLiteral bs -> Pr.lines [
+        "This bytes literal isn't valid syntax: " <> style ErrorSite (fromString bs), "",
+        excerpt,
+        Pr.wrap $ "I was expecting an even number of hexidecimal characters"
+               <> "(one of" <> Pr.group (style Code "0123456789abcdefABCDEF" <> ")")
+               <> "after the" <> Pr.group (style ErrorSite "0xs" <> ".")
+        ]
+      L.InvalidHexLiteral -> Pr.lines [
+        "This number isn't valid syntax: ", "",
+        excerpt,
+        Pr.wrap $ "I was expecting only hexidecimal characters"
+               <> "(one of" <> Pr.group (style Code "0123456789abcdefABCDEF" <> ")")
+               <> "after the" <> Pr.group (style ErrorSite "0x" <> ".")
+        ]
+      L.InvalidOctalLiteral -> Pr.lines [
+        "This number isn't valid syntax: ", "",
+        excerpt,
+        Pr.wrap $ "I was expecting only octal characters"
+               <> "(one of" <> Pr.group (style Code "01234567" <> ")")
+               <> "after the" <> Pr.group (style ErrorSite "0o" <> ".")
+        ]
+      L.InvalidShortHash h -> Pr.lines [
+        "Invalid hash: " <> style ErrorSite (fromString h), "",
+        excerpt ]
+      L.Both e1 e2 -> Pr.lines [go e1, "", go e2]
+      L.UnknownLexeme -> Pr.lines [ "I couldn't parse this.", "", excerpt ]
+      L.MissingFractional n -> Pr.lines [
+        "This number isn't valid syntax: ", "",
+        excerpt,
+        Pr.wrap $ "I was expecting some digits after the '.',"
+               <> "for example: " <> style Code (n <> "0")
+               <> "or" <> Pr.group (style Code (n <> "1e37") <> ".")
+        ]
+      L.MissingExponent n -> Pr.lines [
+        "This number isn't valid syntax: ", "",
+        excerpt,
+        Pr.wrap $ "I was expecting some digits for the exponent,"
+               <> "for example: " <> Pr.group (style Code (n <> "37") <> ".")
+        ]
+      L.TextLiteralMissingClosingQuote _txt -> Pr.lines [
+        "This text is missing a closing quote:", "",
+        excerpt
+        ]
+      L.InvalidEscapeCharacter c -> Pr.lines [
+        "This isn't a valid escape character: " <> style ErrorSite [c], "",
+        excerpt, "",
+        "I only know about the following escape characters:","",
+          let s ch = style Code (fromString $ "\\" <> [ch])
+          in Pr.indentN 2 $ intercalateMap "," s (fst <$> L.escapeChars)
+        ]
+      L.LayoutError -> Pr.lines [
+        "I found an indentation error somewhere in here:", "",
+        excerpt ]
+      L.Opaque msg -> style ErrorSite msg
+
   P.TrivialError sp unexpected expected
     -> fromString
         (P.parseErrorPretty @_ @Void (P.TrivialError sp unexpected expected))

diff --git a/parser-typechecker/src/Unison/TermParser.hs b/parser-typechecker/src/Unison/TermParser.hs
@@ -41,8 +41,9 @@ import qualified Unison.Parser as Parser (seq, uniqueName)
 import qualified Unison.Pattern as Pattern
 import qualified Unison.Term as Term
 import qualified Unison.Type as Type
-import qualified Unison.Typechecker.Components as Components
 import qualified Unison.TypeParser as TypeParser
+import qualified Unison.Typechecker.Components as Components
+import qualified Unison.Util.Bytes as Bytes
 import qualified Unison.Var as Var
 
 watch :: Show a => String -> a -> a
@@ -345,6 +346,7 @@ termLeaf =
     , text
     , char
     , number
+    , bytes
     , boolean
     , link
     , tupleOrParenthesizedTerm
@@ -900,6 +902,13 @@ block' isTop s openBlock closeBlock = do
 number :: Var v => TermP v
 number = number' (tok Term.int) (tok Term.nat) (tok Term.float)
 
+bytes :: Var v => TermP v
+bytes = do
+  b <- bytesToken
+  let a = ann b
+  pure $ Term.app a (Term.builtin a "Bytes.fromList")
+                    (Term.seq a $ Term.nat a . fromIntegral <$> Bytes.toWord8s (L.payload b))
+
 number'
   :: Ord v
   => (L.Token Int64 -> a)

diff --git a/parser-typechecker/src/Unison/TermPrinter.hs b/parser-typechecker/src/Unison/TermPrinter.hs
@@ -36,6 +36,7 @@ import qualified Unison.Type                   as Type
 import qualified Unison.TypePrinter            as TypePrinter
 import           Unison.Var                     ( Var )
 import qualified Unison.Var                    as Var
+import qualified Unison.Util.Bytes             as Bytes
 import           Unison.Util.Monoid             ( intercalateMap )
 import qualified Unison.Util.Pretty             as PP
 import           Unison.Util.Pretty             ( Pretty, ColorText )
@@ -303,6 +304,8 @@ pretty0
       paren (p >= 10) $ pair `PP.hang`
         PP.spaced [pretty0 n (ac 10 Normal im doc) x, fmt S.Constructor "()" ]
     (TupleTerm' xs, _) -> paren True $ commaList xs
+    (Bytes' bs, _) ->
+      fmt S.BytesLiteral "0xs" <> (PP.shown $ Bytes.fromWord8s (map fromIntegral bs))
     BinaryAppsPred' apps lastArg -> paren (p >= 3) $
       binaryApps apps (pretty0 n (ac 3 Normal im doc) lastArg)
     _ -> case (term, nonForcePred) of
@@ -1178,3 +1181,12 @@ unLamsMatch' t = case unLamsUntilDelay' t of
           rhsVars = (ABT.freeVars rhs)
       in Set.union guardVars rhsVars
 
+pattern Bytes' bs <- (toBytes -> Just bs)
+
+toBytes :: Term3 v PrintAnnotation -> Maybe [Word64]
+toBytes (App' (Builtin' "Bytes.fromList") (Sequence' bs)) =
+  toList <$> traverse go bs
+  where go (Nat' n) = Just n
+        go _ = Nothing
+toBytes _ = Nothing
+
diff --git a/parser-typechecker/src/Unison/Util/Bytes.hs b/parser-typechecker/src/Unison/Util/Bytes.hs
@@ -3,6 +3,7 @@
 
 module Unison.Util.Bytes where
 
+import Data.Char
 import Data.Memory.PtrMethods (memCompare, memEqual)
 import Data.Monoid (Sum(..))
 import Foreign.Ptr (plusPtr)
@@ -126,7 +127,7 @@ instance T.Measured (Sum Int) (View ByteString) where
   measure b = Sum (B.length b)
 
 instance Show Bytes where
-  show bs = show (toWord8s bs)
+  show bs = toWord8s (toBase16 bs) >>= \w -> [chr (fromIntegral w)]
 
 -- Produces two lists where the chunks have the same length
 alignChunks :: B.ByteArrayAccess ba => [View ba] -> [View ba] -> ([View ba], [View ba])

diff --git a/parser-typechecker/src/Unison/Util/ColorText.hs b/parser-typechecker/src/Unison/Util/ColorText.hs
@@ -99,6 +99,7 @@ defaultColors :: ST.Element -> Maybe Color
 defaultColors = \case
   ST.NumericLiteral      -> Nothing
   ST.TextLiteral         -> Nothing
+  ST.BytesLiteral        -> Just HiBlack
   ST.CharLiteral         -> Nothing
   ST.BooleanLiteral      -> Nothing
   ST.Blank               -> Nothing

diff --git a/parser-typechecker/src/Unison/Util/SyntaxText.hs b/parser-typechecker/src/Unison/Util/SyntaxText.hs
@@ -13,6 +13,7 @@ type SyntaxText = AnnotatedText Element
 -- The elements of the Unison grammar, for syntax highlighting purposes
 data Element = NumericLiteral
              | TextLiteral
+             | BytesLiteral
              | CharLiteral
              | BooleanLiteral
              | Blank

diff --git a/parser-typechecker/tests/Unison/Test/Lexer.hs b/parser-typechecker/tests/Unison/Test/Lexer.hs
@@ -172,7 +172,8 @@ test =
         , simpleWordyId "x"
         , Close
         , Open "then"
-        , Reserved "else"
+        , Close
+        , Open "else"
         , Close
         ]
   -- Empty `else` clause

diff --git a/parser-typechecker/tests/Unison/Test/TermParser.hs b/parser-typechecker/tests/Unison/Test/TermParser.hs
@@ -43,7 +43,7 @@ test1 = scope "termparser" . tests . map parses $
   , "-1.2e+3"
   , "-1.2e-3"
 
-  , "-4th"
+  , "-4 th"
   , "()"
   , "(0)"
   , "forty"

diff --git a/parser-typechecker/tests/Unison/Test/TermPrinter.hs b/parser-typechecker/tests/Unison/Test/TermPrinter.hs
@@ -118,6 +118,13 @@ test = scope "termprinter" $ tests
   , tc "if _something then _foo else _blah"
   , tc "3.14159"
   , tc "+0"
+  , tc "0xsabba1234"
+  , tcDiff "0x00000001" "1"
+  , tcDiff "+0x00001" "+1"
+  , tcDiff "-0x0001" "-1"
+  , tcDiff "0xff" "255"
+  , tcDiff "+0xff" "+255"
+  , tcDiff "0o77777777" "16777215" -- Each octal digit is 3 bits, 8 7s is 2^(8*3) - 1
   , tc "\"some text\""
   , tc "\"they said \\\"hi\\\"\""
   , pending $ tc "\'they said \\\'hi\\\'\'" -- TODO lexer doesn't support strings with single quotes in

diff --git a/unison-src/new-runtime-transcripts/utf8.output.md b/unison-src/new-runtime-transcripts/utf8.output.md
@@ -35,7 +35,7 @@ ascii = "ABCDE"
 
     4 | > toUtf8 ascii
           ⧩
-          fromList [65, 66, 67, 68, 69]
+          0xs4142434445
 
 ```
 non-ascii characters are encoded as multiple bytes.
@@ -62,8 +62,7 @@ greek = "ΑΒΓΔΕ"
 
     4 | > toUtf8 greek
           ⧩
-          fromList
-            [206, 145, 206, 146, 206, 147, 206, 148, 206, 149]
+          0xsce91ce92ce93ce94ce95
 
 ```
 We can check that encoding and then decoding should give us back the same `Text` we started with 

diff --git a/unison-src/transcripts/bytesFromList.output.md b/unison-src/transcripts/bytesFromList.output.md
@@ -16,6 +16,6 @@ This should render as `Bytes.fromList [1,2,3,4]`, not `##Bytes.fromSequence [1,2
 
     1 | > Bytes.fromList [1,2,3,4]
           ⧩
-          fromList [1, 2, 3, 4]
+          0xs01020304
 
 ```
diff --git a/unison-src/transcripts/doc-formatting.output.md b/unison-src/transcripts/doc-formatting.output.md
@@ -82,11 +82,9 @@ commented = [:
 
   commented : Doc
   commented =
-    [: 
-    example:
+    [: example:
 
-      -- a comment
-      f x = x + 1
+    -- a comment f x = x + 1
      :]
 
 ```
@@ -291,8 +289,7 @@ doc6 = [:
 
   doc6 : Doc
   doc6 =
-    [: 
-      - foo
+    [: - foo
       - bar
     and the rest.
      :]
@@ -382,9 +379,8 @@ para line lorem ipsum dolor lorem ipsum dolor lorem ipsum dolor lorem ipsum dolo
 
   test1 : Doc
   test1 =
-    [: 
-    The internal logic starts to get hairy when you use the \@ features,
-    for example referencing a name like @List.take.  Internally,
+    [: The internal logic starts to get hairy when you use the \@
+    features, for example referencing a name like @List.take.  Internally,
     the text between each such usage is its own blob (blob ends here
     --> @List.take), so paragraph reflow has to be aware of multiple
     blobs to do paragraph reflow (or, more accurately, to do the
@@ -491,8 +487,7 @@ View is fine.
 
   test2 : Doc
   test2 =
-    [: 
-    Take a look at this:
+    [: Take a look at this:
     @[source] foo    ▶    bar
      :]
 
@@ -501,7 +496,6 @@ But note it's not obvious how display should best be handling this.  At the mome
 ```ucm
 .> display test2
 
-
   Take a look at this:
   foo n =
     use Nat +

diff --git a/unison-src/transcripts/docs.output.md b/unison-src/transcripts/docs.output.md
@@ -140,7 +140,6 @@ Now that documentation is linked to the definition. We can view it if we like:
 
 .> display 1
 
-
   `builtin.List.take n xs` returns the first `n` elements of `xs`.
   (No need to add line breaks manually. The display command will
   do wrapping of text for you.  Indent any lines where you don't
@@ -164,7 +163,6 @@ Or there's also a convenient function, `docs`, which shows the `Doc` values that
 ```ucm
 .> docs builtin.List.take
 
-
   `builtin.List.take n xs` returns the first `n` elements of `xs`.
   (No need to add line breaks manually. The display command will
   do wrapping of text for you.  Indent any lines where you don't
@@ -190,11 +188,10 @@ Note that if we view the source of the documentation, the various references are
 
   docs.List.take : Doc
   docs.List.take =
-    [: 
-    `@builtin.List.take n xs` returns the first `n` elements of `xs`.
-    (No need to add line breaks manually. The display command will
-    do wrapping of text for you.  Indent any lines where you don't
-    want it to do this.)
+    [: `@builtin.List.take n xs` returns the first `n` elements of
+    `xs`. (No need to add line breaks manually. The display command
+    will do wrapping of text for you.  Indent any lines where you
+    don't want it to do this.)
 
     ## Examples: