@@ -11,63 +11,13 @@ var fork = require('child_process').fork,
11
11
through = require ( 'through2' ) ,
12
12
concat = require ( 'gulp-concat' ) ,
13
13
jsdoc = require ( 'gulp-jsdoc-to-markdown' ) ,
14
- insert = require ( 'gulp-insert' ) ;
14
+ insert = require ( 'gulp-insert' ) ,
15
+ generateNamedEntityData = require ( './scripts/generate_named_entity_data' ) ,
16
+ generateParserFeedbackTest = require ( './scripts/generate_parser_feedback_test' ) ;
15
17
16
18
17
- gulp . task ( 'generate-trie' , function ( ) {
18
- function createTrie ( entitiesData ) {
19
- return Object . keys ( entitiesData ) . reduce ( function ( trie , entity ) {
20
- var resultCp = entitiesData [ entity ] . codepoints ;
21
-
22
- entity = entity . replace ( / ^ & / , '' ) ;
23
-
24
- var entityLength = entity . length ,
25
- last = entityLength - 1 ,
26
- leaf = trie ;
27
-
28
- for ( var i = 0 ; i < entityLength ; i ++ ) {
29
- var key = entity . charCodeAt ( i ) ;
30
-
31
- if ( ! leaf [ key ] )
32
- leaf [ key ] = { } ;
33
-
34
- if ( i === last )
35
- leaf [ key ] . c = resultCp ;
36
-
37
- else {
38
- if ( ! leaf [ key ] . l )
39
- leaf [ key ] . l = { } ;
40
-
41
- leaf = leaf [ key ] . l ;
42
- }
43
- }
44
-
45
- return trie ;
46
- } , { } ) ;
47
- }
48
-
49
- function trieCodeGen ( file , encoding , callback ) {
50
- var entitiesData = JSON . parse ( file . contents . toString ( ) ) ,
51
- trie = createTrie ( entitiesData ) ,
52
- out = '\'use strict\';\n\n' +
53
- '//NOTE: this file contains auto-generated trie structure that is used for named entity references consumption\n' +
54
- '//(see: http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#tokenizing-character-references and\n' +
55
- '//http://www.whatwg.org/specs/web-apps/current-work/multipage/named-character-references.html#named-character-references)\n' +
56
- 'module.exports = ' + JSON . stringify ( trie ) . replace ( / " / g, '' ) + ';\n' ;
57
-
58
-
59
- file . contents = new Buffer ( out ) ;
60
-
61
- callback ( null , file ) ;
62
- }
63
-
64
- return download ( 'https://html.spec.whatwg.org/multipage/entities.json' )
65
- . pipe ( through . obj ( trieCodeGen ) )
66
- . pipe ( rename ( 'named_entity_trie.js' ) )
67
- . pipe ( gulp . dest ( 'lib/tokenizer' ) ) ;
68
- } ) ;
69
-
70
- gulp . task ( 'generate-api-reference' , function ( ) {
19
+ // Docs
20
+ gulp . task ( 'update-api-reference' , function ( ) {
71
21
return gulp
72
22
. src ( 'lib/**/*.js' )
73
23
. pipe ( concat ( '05_api_reference.md' ) )
@@ -76,13 +26,15 @@ gulp.task('generate-api-reference', function () {
76
26
. pipe ( gulp . dest ( 'docs' ) ) ;
77
27
} ) ;
78
28
79
- gulp . task ( 'docs' , [ 'generate -api-reference' ] , function ( ) {
29
+ gulp . task ( 'docs' , [ 'update -api-reference' ] , function ( ) {
80
30
return gulp
81
31
. src ( 'docs/*.md' )
82
32
. pipe ( concat ( 'index.md' ) )
83
33
. pipe ( gulp . dest ( 'docs/build' ) ) ;
84
34
} ) ;
85
35
36
+
37
+ // Benchmarks
86
38
gulp . task ( 'install-upstream-parse5' , function ( ) {
87
39
return gulp
88
40
. src ( 'test/benchmark/package.json' )
@@ -112,99 +64,57 @@ gulp.task('named-entity-data-memory-benchmark', function (done) {
112
64
fork ( './test/memory_benchmark/named_entity_data' ) . once ( 'close' , done ) ;
113
65
} ) ;
114
66
67
+
68
+ // Test
115
69
gulp . task ( 'lint' , function ( ) {
116
70
return gulp
117
71
. src ( [
118
72
'lib/**/*.js' ,
119
73
'test/**/*.js' ,
74
+ 'scripts/**/*.js' ,
120
75
'Gulpfile.js'
121
76
] )
122
77
. pipe ( eslint ( ) )
123
78
. pipe ( eslint . format ( ) )
124
79
. pipe ( eslint . failAfterError ( ) ) ;
125
80
} ) ;
126
81
127
- gulp . task ( 'update-feedback-tests' , function ( ) {
128
- var Parser = require ( './lib/Parser' ) ;
129
- var Tokenizer = require ( './lib/tokenizer' ) ;
130
- var defaultTreeAdapter = require ( './lib/tree_adapters/default' ) ;
131
- var testUtils = require ( './test/test_utils' ) ;
132
-
133
- function appendToken ( dest , token ) {
134
- switch ( token . type ) {
135
- case Tokenizer . EOF_TOKEN :
136
- return false ;
137
- case Tokenizer . NULL_CHARACTER_TOKEN :
138
- case Tokenizer . WHITESPACE_CHARACTER_TOKEN :
139
- token . type = Tokenizer . CHARACTER_TOKEN ;
140
- /* falls through */
141
- case Tokenizer . CHARACTER_TOKEN :
142
- if ( dest . length > 0 && dest [ dest . length - 1 ] . type === Tokenizer . CHARACTER_TOKEN ) {
143
- dest [ dest . length - 1 ] . chars += token . chars ;
144
- return true ;
145
- }
146
- break ;
147
- }
148
- dest . push ( token ) ;
149
- return true ;
150
- }
151
-
152
- function collectParserTokens ( html ) {
153
- var tokens = [ ] ;
154
- var parser = new Parser ( ) ;
155
-
156
- parser . _processInputToken = function ( token ) {
157
- Parser . prototype . _processInputToken . call ( this , token ) ;
158
-
159
- // Needed to split attributes of duplicate <html> and <body>
160
- // which are otherwise merged as per tree constructor spec
161
- if ( token . type === Tokenizer . START_TAG_TOKEN )
162
- token . attrs = token . attrs . slice ( ) ;
163
-
164
- appendToken ( tokens , token ) ;
165
- } ;
166
-
167
- parser . parse ( html ) ;
168
-
169
- return tokens . map ( testUtils . convertTokenToHtml5Lib ) ;
170
- }
82
+ gulp . task ( 'test' , [ 'lint' ] , function ( ) {
83
+ return gulp
84
+ . src ( 'test/fixtures/*_test.js' )
85
+ . pipe ( mocha ( {
86
+ ui : 'exports' ,
87
+ reporter : 'progress' ,
88
+ timeout : typeof v8debug === 'undefined' ? 20000 : Infinity // NOTE: disable timeouts in debug
89
+ } ) ) ;
90
+ } ) ;
91
+
171
92
93
+ // Scripts
94
+ gulp . task ( 'update-feedback-tests' , function ( ) {
172
95
return gulp
173
96
. src ( [ 'test/data/tree_construction/*.dat' , 'test/data/tree_construction_regression/*.dat' ] )
174
97
. pipe ( through . obj ( function ( file , encoding , callback ) {
175
- var tests = testUtils . parseTreeConstructionTestData (
176
- file . contents . toString ( ) ,
177
- defaultTreeAdapter
178
- ) ;
179
-
180
- var out = {
181
- tests : tests . filter ( function ( test ) {
182
- return ! test . fragmentContext ; // TODO
183
- } ) . map ( function ( test ) {
184
- var input = test . input ;
185
-
186
- return {
187
- description : testUtils . addSlashes ( input ) ,
188
- input : input ,
189
- output : collectParserTokens ( input )
190
- } ;
191
- } )
192
- } ;
193
-
194
- file . contents = new Buffer ( JSON . stringify ( out , null , 4 ) ) ;
98
+ var test = generateParserFeedbackTest ( file . contents . toString ( ) ) ;
99
+
100
+ file . contents = new Buffer ( test ) ;
195
101
196
102
callback ( null , file ) ;
197
103
} ) )
198
104
. pipe ( rename ( { extname : '.test' } ) )
199
105
. pipe ( gulp . dest ( 'test/data/parser_feedback' ) ) ;
200
106
} ) ;
201
107
202
- gulp . task ( 'test' , [ 'lint' ] , function ( ) {
203
- return gulp
204
- . src ( 'test/fixtures/*_test.js' )
205
- . pipe ( mocha ( {
206
- ui : 'exports' ,
207
- reporter : 'progress' ,
208
- timeout : typeof v8debug === 'undefined' ? 20000 : Infinity // NOTE: disable timeouts in debug
209
- } ) ) ;
108
+
109
+ gulp . task ( 'update-named-entities-data' , function ( ) {
110
+ return download ( 'https://html.spec.whatwg.org/multipage/entities.json' )
111
+ . pipe ( through . obj ( function ( file , encoding , callback ) {
112
+ var entitiesData = JSON . parse ( file . contents . toString ( ) ) ;
113
+
114
+ file . contents = new Buffer ( generateNamedEntityData ( entitiesData ) ) ;
115
+
116
+ callback ( null , file ) ;
117
+ } ) )
118
+ . pipe ( rename ( 'named_entity_data.js' ) )
119
+ . pipe ( gulp . dest ( 'lib/tokenizer' ) ) ;
210
120
} ) ;
0 commit comments