@@ -30,7 +30,7 @@ const Renderer = @import("renderer.zig").Renderer;
30
30
const Window = @import ("html/window.zig" ).Window ;
31
31
const Walker = @import ("dom/walker.zig" ).WalkerDepthFirst ;
32
32
const Scheduler = @import ("Scheduler.zig" );
33
- const HttpClient = @import ("../http/Client .zig" );
33
+ const Http = @import ("../http/Http .zig" );
34
34
const ScriptManager = @import ("ScriptManager.zig" );
35
35
const HTMLDocument = @import ("html/document.zig" ).HTMLDocument ;
36
36
@@ -87,13 +87,23 @@ pub const Page = struct {
87
87
polyfill_loader : polyfill.Loader = .{},
88
88
89
89
scheduler : Scheduler ,
90
- http_client : * HttpClient ,
90
+ http_client : * Http.Client ,
91
91
script_manager : ScriptManager ,
92
92
93
93
mode : Mode ,
94
94
95
95
load_state : LoadState = .parsing ,
96
96
97
+ // Page.wait balances waiting for resources / tasks and producing an output.
98
+ // Up until a timeout, Page.wait will always wait for inflight or pending
99
+ // HTTP requests, via the Http.Client.active counter. However, intercepted
100
+ // requests (via CDP, but it could be anything), aren't considered "active"
101
+ // connection. So it's possible that we have intercepted requests (which are
102
+ // pending on some driver to continue/abort) while Http.Client.active == 0.
103
+ // This boolean exists to supplment Http.Client.active and inform Page.wait
104
+ // of pending connections.
105
+ request_intercepted : bool = false ,
106
+
97
107
const Mode = union (enum ) {
98
108
pre : void ,
99
109
err : anyerror ,
@@ -275,16 +285,26 @@ pub const Page = struct {
275
285
while (true ) {
276
286
SW : switch (self .mode ) {
277
287
.pre , .raw = > {
288
+ if (self .request_intercepted ) {
289
+ // the page request was intercepted.
290
+
291
+ // there shouldn't be any active requests;
292
+ std .debug .assert (http_client .active == 0 );
293
+
294
+ // nothing we can do for this, need to kick the can up
295
+ // the chain and wait for activity (e.g. a CDP message)
296
+ // to unblock this.
297
+ return ;
298
+ }
299
+
278
300
// The main page hasn't started/finished navigating.
279
301
// There's no JS to run, and no reason to run the scheduler.
280
-
281
302
if (http_client .active == 0 ) {
282
303
// haven't started navigating, I guess.
283
304
return ;
284
305
}
285
306
286
307
// There should only be 1 active http transfer, the main page
287
- std .debug .assert (http_client .active == 1 );
288
308
try http_client .tick (ms_remaining );
289
309
},
290
310
.html , .parsed = > {
@@ -330,20 +350,35 @@ pub const Page = struct {
330
350
331
351
_ = try scheduler .runLowPriority ();
332
352
333
- // We'll block here, waiting for network IO. We know
334
- // when the next timeout is scheduled, and we know how long
335
- // the caller wants to wait for, so we can pick a good wait
336
- // duration
337
- const ms_to_wait = @min (ms_remaining , ms_to_next_task orelse 1000 );
353
+ const request_intercepted = self .request_intercepted ;
354
+
355
+ // We want to prioritize processing intercepted requests
356
+ // because, the sooner they get unblocked, the sooner we
357
+ // can start the HTTP request. But we still want to advanced
358
+ // existing HTTP requests, if possible. So, if we have
359
+ // intercepted requests, we'll still look at existing HTTP
360
+ // requests, but we won't block waiting for more data.
361
+ const ms_to_wait =
362
+ if (request_intercepted ) 0
363
+
364
+ // But if we have no intercepted requests, we'll wait
365
+ // for as long as we can for data to our existing
366
+ // inflight requests
367
+ else @min (ms_remaining , ms_to_next_task orelse 1000 );
368
+
338
369
try http_client .tick (ms_to_wait );
339
370
340
- if (try_catch .hasCaught ()) {
341
- const msg = (try try_catch .err (self .arena )) orelse "unknown" ;
342
- log .warn (.user_script , "page wait" , .{ .err = msg , .src = "data" });
343
- return error .JsError ;
371
+ if (request_intercepted ) {
372
+ // Again, proritizing intercepted requests. Exit this
373
+ // loop so that our caller can hopefully resolve them
374
+ // (i.e. continue or abort them);
375
+ return ;
344
376
}
345
377
},
346
- .err = > | err | return err ,
378
+ .err = > | err | {
379
+ self .mode = .{ .raw_done = @errorName (err ) };
380
+ return err ;
381
+ },
347
382
.raw_done = > return ,
348
383
}
349
384
@@ -362,7 +397,7 @@ pub const Page = struct {
362
397
std .debug .print ("\n active requests: {d}\n " , .{self .http_client .active });
363
398
var n_ = self .http_client .handles .in_use .first ;
364
399
while (n_ ) | n | {
365
- const transfer = HttpClient .Transfer .fromEasy (n .data .conn .easy ) catch | err | {
400
+ const transfer = Http .Transfer .fromEasy (n .data .conn .easy ) catch | err | {
366
401
std .debug .print (" - failed to load transfer: {any}\n " , .{err });
367
402
break ;
368
403
};
@@ -435,7 +470,7 @@ pub const Page = struct {
435
470
is_http : bool = true ,
436
471
is_navigation : bool = false ,
437
472
};
438
- pub fn requestCookie (self : * const Page , opts : RequestCookieOpts ) HttpClient .RequestCookie {
473
+ pub fn requestCookie (self : * const Page , opts : RequestCookieOpts ) Http.Client .RequestCookie {
439
474
return .{
440
475
.jar = self .cookie_jar ,
441
476
.origin = & self .url .uri ,
@@ -473,7 +508,7 @@ pub const Page = struct {
473
508
const owned_url = try self .arena .dupeZ (u8 , request_url );
474
509
self .url = try URL .parse (owned_url , null );
475
510
476
- var headers = try HttpClient .Headers .init ();
511
+ var headers = try Http .Headers .init ();
477
512
if (opts .header ) | hdr | try headers .add (hdr );
478
513
try self .requestCookie (.{ .is_navigation = true }).headersForRequest (self .arena , owned_url , & headers );
479
514
@@ -484,6 +519,7 @@ pub const Page = struct {
484
519
.headers = headers ,
485
520
.body = opts .body ,
486
521
.cookie_jar = self .cookie_jar ,
522
+ .resource_type = .document ,
487
523
.header_done_callback = pageHeaderDoneCallback ,
488
524
.data_callback = pageDataCallback ,
489
525
.done_callback = pageDoneCallback ,
@@ -563,7 +599,7 @@ pub const Page = struct {
563
599
);
564
600
}
565
601
566
- fn pageHeaderDoneCallback (transfer : * HttpClient .Transfer ) ! void {
602
+ fn pageHeaderDoneCallback (transfer : * Http .Transfer ) ! void {
567
603
var self : * Page = @alignCast (@ptrCast (transfer .ctx ));
568
604
569
605
// would be different than self.url in the case of a redirect
@@ -578,7 +614,7 @@ pub const Page = struct {
578
614
});
579
615
}
580
616
581
- fn pageDataCallback (transfer : * HttpClient .Transfer , data : []const u8 ) ! void {
617
+ fn pageDataCallback (transfer : * Http .Transfer , data : []const u8 ) ! void {
582
618
var self : * Page = @alignCast (@ptrCast (transfer .ctx ));
583
619
584
620
if (self .mode == .pre ) {
@@ -1002,7 +1038,7 @@ pub const NavigateReason = enum {
1002
1038
pub const NavigateOpts = struct {
1003
1039
cdp_id : ? i64 = null ,
1004
1040
reason : NavigateReason = .address_bar ,
1005
- method : HttpClient .Method = .GET ,
1041
+ method : Http .Method = .GET ,
1006
1042
body : ? []const u8 = null ,
1007
1043
header : ? [:0 ]const u8 = null ,
1008
1044
};
0 commit comments