Skip to content

Commit 250ae02

Browse files
committed
Add time benchmark for SAX parsers
1 parent 829d96e commit 250ae02

File tree

5 files changed

+133
-15
lines changed

5 files changed

+133
-15
lines changed

README.md

+15-1
Original file line numberDiff line numberDiff line change
@@ -12,12 +12,17 @@ stack exec generate
1212

1313
## Time
1414

15-
Conversion from `ByteString` to DOM
15+
Columns:
16+
17+
- Name - name of the package on Hackage
18+
- Language - pure Haskell or a C binding
1619

1720
``` bash
1821
stack bench
1922
```
2023

24+
Conversion from `ByteString` to DOM
25+
2126
| Name | Language | Time |
2227
|------|----------|------|
2328
| hexml | C | 1.787 ms |
@@ -26,6 +31,15 @@ stack bench
2631
| hexpat | C | 133.4 ms |
2732
| xml-conduit | Haskell | 173.7 ms |
2833

34+
Conversion from `ByteString` to a list of SAX events
35+
36+
| Name | Language | Time |
37+
|------|----------|------|
38+
| xeno | Haskell | 31.24 ms |
39+
| sax | Haskell | 34.69 ms |
40+
| hexpat | C | 74.84 ms |
41+
| conduit | Haskell | 196.1 ms |
42+
2943
## Space
3044

3145
``` bash

Time.hs

+87-11
Original file line numberDiff line numberDiff line change
@@ -1,37 +1,104 @@
1+
{-# OPTIONS_GHC -fno-warn-orphans #-}
12
{-# LANGUAGE CPP #-}
23
{-# LANGUAGE LambdaCase #-}
4+
{-# LANGUAGE DeriveGeneric #-}
35
{-# LANGUAGE TemplateHaskell #-}
46
{-# LANGUAGE TypeApplications #-}
57

68
module Main (main) where
79

10+
import Control.DeepSeq
811
import Criterion.Main
912
import Criterion.Types
1013
import Data.Book
1114
import Data.ByteString (ByteString)
1215
import qualified Data.ByteString.Lazy
16+
import qualified Data.Conduit
17+
import qualified Data.Conduit.List
1318
import Data.Default
1419
import Data.FileEmbed
20+
import GHC.Generics (Generic)
21+
import qualified SAX
22+
import qualified SAX.Streaming
23+
import qualified Streaming.Prelude
1524
import qualified Text.XML
1625
import qualified Text.XML.DOM.Parser
26+
import qualified Text.XML.Expat.SAX
1727
import qualified Text.XML.Expat.Tree
1828
import qualified Text.XML.Hexml
29+
import qualified Text.XML.Stream.Parse
1930
import qualified Xeno.DOM
31+
import qualified Xeno.SAX
2032
import qualified Xmlbf
2133
import qualified Xmlbf.Xeno
2234
#ifdef LIBXML
2335
import qualified Text.XML.LibXML
2436
#endif
2537

38+
data XenoEvent
39+
= OpenTag ByteString
40+
| EndOpenTag ByteString
41+
| CloseTag ByteString
42+
| Attribute ByteString ByteString
43+
| Text ByteString
44+
| CDATA ByteString
45+
deriving (Generic)
46+
47+
instance NFData XenoEvent
48+
2649
main :: IO ()
2750
main = defaultMainWith
2851
defaultConfig { csvFile = Just "out.csv" }
29-
[ bgroup "dom" (dom inputBs)
30-
-- , bgroup "struct" (struct inputBs)
52+
[ bgroup "sax" (sax inputBs)
53+
, bgroup "dom" (dom inputBs)
54+
, bgroup "struct" (struct inputBs)
3155
]
3256

33-
-- | Conversion from 'Data.ByteString.ByteString' to DOM
34-
dom :: Data.ByteString.ByteString -> [Benchmark]
57+
-- | Conversion from 'ByteString' to a list of SAX events
58+
sax :: ByteString -> [Benchmark]
59+
sax bs =
60+
[ bench "xeno fold list" $ nf
61+
( Xeno.SAX.fold
62+
(\v -> (:v) . OpenTag)
63+
(\v k -> (:v) . Attribute k)
64+
(\v -> (:v) . EndOpenTag)
65+
(\v -> (:v) . Text)
66+
(\v -> (:v) . CloseTag)
67+
(\v -> (:v) . CDATA)
68+
mempty )
69+
bs
70+
, bench "xeno process conduit" $ nf
71+
( \input ->
72+
let
73+
parseBs = Xeno.SAX.process
74+
(Data.Conduit.yield . OpenTag)
75+
(\k -> Data.Conduit.yield . Attribute k)
76+
(Data.Conduit.yield . EndOpenTag)
77+
(Data.Conduit.yield . Text)
78+
(Data.Conduit.yield . CloseTag)
79+
(Data.Conduit.yield . CDATA)
80+
input
81+
in case Data.Conduit.connect parseBs Data.Conduit.List.consume of
82+
Nothing -> error "Unexpected parse error"
83+
Just v -> v )
84+
bs
85+
, bench "sax" $ nf
86+
( \input -> case Streaming.Prelude.toList_ (SAX.Streaming.streamXml input) of
87+
Left _ -> error "Unexpected parse error"
88+
Right v -> v )
89+
bs
90+
, bench "hexpat" $ nf
91+
( Text.XML.Expat.SAX.parse @ByteString @ByteString Text.XML.Expat.SAX.defaultParseOptions )
92+
( Data.ByteString.Lazy.fromStrict bs )
93+
, bench "conduit" $ nf
94+
( \input -> case Data.Conduit.connect (Text.XML.Stream.Parse.parseLBS def input) Data.Conduit.List.consume of
95+
Nothing -> error "Unexpected parse error"
96+
Just v -> v )
97+
( Data.ByteString.Lazy.fromStrict bs )
98+
]
99+
100+
-- | Conversion from 'ByteString' to DOM
101+
dom :: ByteString -> [Benchmark]
35102
dom bs =
36103
[ bench "hexml" $ whnf
37104
( \input -> case Text.XML.Hexml.parse input of
@@ -59,13 +126,7 @@ dom bs =
59126
-- | Conversion from DOM to data type
60127
struct :: ByteString -> [Benchmark]
61128
struct bs =
62-
[ bench "dom-parser" $ nf
63-
( \doc -> case Text.XML.DOM.Parser.runDomParser doc (Text.XML.DOM.Parser.fromDom @Catalog) of
64-
Left _ -> error "Unexpected conversion error"
65-
Right v -> v )
66-
( Text.XML.parseLBS_ def (Data.ByteString.Lazy.fromStrict bs) )
67-
-- TODO: https://gitlab.com/k0001/xmlbf/issues/6
68-
, bench "xmlbf-xeno" $ nf
129+
[ bench "xmlbf-xeno" $ nf
69130
( \case
70131
Left _ -> error "Unexpected parse error"
71132
Right n -> case Xmlbf.Xeno.element n of
@@ -74,7 +135,22 @@ struct bs =
74135
Left e -> error e
75136
Right v -> v )
76137
( Xeno.DOM.parse inputBs )
138+
-- TODO: Fix @SAX.Fail "fail handler"@ error
139+
-- , bench "sax" $ nf
140+
-- ( \stream -> case SAX.parseSax rootSaxParser stream of
141+
-- SAX.Partial _ _ _ -> error "Unexpected conversion error: Partial"
142+
-- SAX.Done r -> r
143+
-- SAX.Fail e -> error $ "Unexpected conversion error: Fail " ++ e )
144+
-- ( SAX.Streaming.streamXml bs )
145+
, bench "dom-parser" $ nf
146+
( \doc -> case Text.XML.DOM.Parser.runDomParser doc (Text.XML.DOM.Parser.fromDom @Catalog) of
147+
Left _ -> error "Unexpected conversion error"
148+
Right v -> v )
149+
( Text.XML.parseLBS_ def (Data.ByteString.Lazy.fromStrict bs) )
77150
]
78151

79152
inputBs :: ByteString
80153
inputBs = $(embedFile "in.xml")
154+
155+
-- Assuming that SAX representation is strict (NF=WHNF)
156+
instance NFData SAX.Streaming.SaxEvent where rnf = rwhnf

haskell-perf-xml.cabal

+7-2
Original file line numberDiff line numberDiff line change
@@ -8,20 +8,21 @@ cabal-version: >=1.10
88

99
flag libxml
1010
description: Enable libxml benchmarks
11-
default: False
11+
default: False
1212

1313
library
1414
exposed-modules: Data.Book
1515
build-depends: base
1616
, QuickCheck
17+
, bytestring
1718
, deepseq
1819
, dom-parser
20+
, sax
1921
, text
2022
, time
2123
, xml-conduit-writer
2224
, xmlbf
2325
, xmlbf-xeno
24-
, bytestring
2526
hs-source-dirs: src
2627
default-language: Haskell2010
2728
ghc-options: -Wall
@@ -64,13 +65,17 @@ benchmark time
6465
main-is: Time.hs
6566
build-depends: base
6667
, bytestring
68+
, conduit
6769
, criterion
6870
, data-default
71+
, deepseq
6972
, dom-parser
7073
, file-embed
7174
, haskell-perf-xml
7275
, hexml
7376
, hexpat
77+
, sax
78+
, streaming
7479
, xeno
7580
, xml-conduit
7681
, xmlbf

src/Data/Book.hs

+21
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,10 @@ import Data.Char
1010
import Data.Fixed
1111
import Data.Text (Text)
1212
import qualified Data.Text as T
13+
import qualified Data.Text.Encoding as T
1314
import Data.Time
1415
import GHC.Generics (Generic)
16+
import qualified SAX
1517
import Test.QuickCheck
1618
import qualified Text.XML.DOM.Parser as XDP
1719
import qualified Text.XML.Writer as XW
@@ -86,6 +88,17 @@ instance Xmlbf.FromXml Book where
8688
<*> Xmlbf.pElement "publish_date" (Xmlbf.pText >>= parseTimeM True defaultTimeLocale "%F" . T.unpack)
8789
<*> Xmlbf.pElement "description" Xmlbf.pText
8890

91+
bookSaxParser :: SAX.SaxParser Book
92+
bookSaxParser = SAX.withTag "book" $ Book
93+
<$> SAX.withTag "author" saxText
94+
<*> SAX.withTag "title" saxText
95+
<*> SAX.withTag "genre" saxText
96+
<*> SAX.withTag "price" (read . T.unpack <$> saxText)
97+
<*> SAX.withTag "publish_date" (saxText >>= parseTimeM True defaultTimeLocale "%F" . T.unpack)
98+
<*> SAX.withTag "description" saxText
99+
where
100+
saxText = T.decodeUtf8 <$> SAX.bytes
101+
89102
newtype Catalog = Catalog [Book]
90103
deriving (Generic, NFData)
91104

@@ -102,9 +115,17 @@ instance Xmlbf.FromXml Catalog where
102115
fromXml = Xmlbf.pElement "catalog" $ Catalog
103116
<$> many Xmlbf.fromXml
104117

118+
catalogSaxParser :: SAX.SaxParser Catalog
119+
catalogSaxParser = SAX.withTag "catalog" $ Catalog
120+
<$> many bookSaxParser
121+
105122
newtype Root = Root Catalog
106123
deriving (Generic, NFData)
107124

108125
instance Xmlbf.FromXml Root where
109126
fromXml = Xmlbf.pElement "root" $ Root
110127
<$> Xmlbf.fromXml
128+
129+
rootSaxParser :: SAX.SaxParser Root
130+
rootSaxParser = SAX.withTag "root" $ Root
131+
<$> catalogSaxParser

stack.yaml

+3-1
Original file line numberDiff line numberDiff line change
@@ -2,5 +2,7 @@ resolver: lts-11.1
22
extra-deps:
33
- html-entities-1.1.4.2
44
- libxml-0.1.1
5-
- xmlbf-0.3
5+
- xmlbf-0.4
66
- xmlbf-xeno-0.1.1
7+
- git: https://github.com/dredozubov/sax-parser.git
8+
commit: 4eb65a8b20878311eae25a16583d4790a66400b9

0 commit comments

Comments
 (0)