diff --git a/chromedp/ri/main.go b/chromedp/ri/main.go new file mode 100644 index 0000000..3d5a8ce --- /dev/null +++ b/chromedp/ri/main.go @@ -0,0 +1,184 @@ +// Copyright 2023-2025 Lightpanda (Selecy SAS) +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +package main + +import ( + "context" + "encoding/base64" + "errors" + "flag" + "fmt" + "io" + "log" + "log/slog" + "os" + "strings" + "time" + + "github.com/chromedp/cdproto/cdp" + "github.com/chromedp/cdproto/fetch" + "github.com/chromedp/chromedp" +) + +const ( + exitOK = 0 + exitFail = 1 +) + +// main starts interruptable context and runs the program. +func main() { + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + err := run(ctx, os.Args, os.Stdout, os.Stderr) + if err != nil { + fmt.Fprintln(os.Stderr, err.Error()) + os.Exit(exitFail) + } + + os.Exit(exitOK) +} + +const ( + CdpWSDefault = "ws://127.0.0.1:9222" +) + +func run(ctx context.Context, args []string, stdout, stderr io.Writer) error { + // declare runtime flag parameters. + flags := flag.NewFlagSet(args[0], flag.ExitOnError) + flags.SetOutput(stderr) + + var ( + verbose = flags.Bool("verbose", false, "enable debug log level") + cdpws = flags.String("cdp", env("CDPCLI_WS", CdpWSDefault), "cdp ws to connect") + ) + + // usage func declaration. + exec := args[0] + flags.Usage = func() { + fmt.Fprintf(stderr, "usage: %s ]\n", exec) + fmt.Fprintf(stderr, "chromedp fetch an url and intercept requests.\n") + fmt.Fprintf(stderr, "\nCommand line options:\n") + flags.PrintDefaults() + fmt.Fprintf(stderr, "\nEnvironment vars:\n") + fmt.Fprintf(stderr, "\tCDPCLI_WS\tdefault %s\n", CdpWSDefault) + } + if err := flags.Parse(args[1:]); err != nil { + return err + } + + if *verbose { + slog.SetLogLoggerLevel(slog.LevelDebug) + } + + args = flags.Args() + if len(args) != 1 { + return errors.New("url is required") + } + url := args[0] + + ctx, cancel := chromedp.NewRemoteAllocator(ctx, + *cdpws, chromedp.NoModifyURL, + ) + defer cancel() + + // build context options + var opts []chromedp.ContextOption + if *verbose { + opts = append(opts, chromedp.WithDebugf(log.Printf)) + } + + ctx, cancel = chromedp.NewContext(ctx, opts...) + defer cancel() + + // ensure the first tab is created + if err := chromedp.Run(ctx); err != nil { + return fmt.Errorf("new tab: %w", err) + } + + chromedp.ListenTarget(ctx, func(ev any) { + switch ev := ev.(type) { + case *fetch.EventRequestPaused: + go func() { + url := ev.Request.URL + fmt.Fprintf(os.Stdout, "%s %s\n", ev.RequestID, url) + + // alter the response with a new body + if strings.HasSuffix(url, "/reviews.json") { + encoded := base64.StdEncoding.EncodeToString([]byte(`["alter review"]`)) + _ = chromedp.Run(ctx, + fetch.FulfillRequest(ev.RequestID, 200).WithBody(encoded), + ) + return + } + + // by default let the request running. + _ = chromedp.Run(ctx, fetch.ContinueRequest(ev.RequestID)) + }() + } + }) + + if err := chromedp.Run(ctx, + fetch.Enable().WithPatterns(nil), + ); err != nil { + log.Fatal(err) + } + + err := chromedp.Run(ctx, chromedp.Navigate(url)) + if err != nil { + return fmt.Errorf("navigate %s: %w", url, err) + } + + var a []*cdp.Node + if err := chromedp.Run(ctx, + chromedp.Nodes(`#product-reviews > div > p`, &a, + chromedp.Populate(1, false, + chromedp.PopulateWait(50*time.Millisecond), + ), + ), + ); err != nil { + return fmt.Errorf("get reviews: %w", err) + } + + reviews := make([]string, 0, len(a)) + for _, aa := range a { + if len(aa.Children) != 1 { + // should not happen, but it will be catched by the following + // asserts. + continue + } + reviews = append(reviews, aa.Children[0].NodeValue) + } + + fmt.Fprintf(os.Stdout, "%v\n", reviews) + + if len(reviews) != 1 { + return errors.New("invalid reviews number") + } + if reviews[0] != "alter review" { + return errors.New("invalid reviews title") + } + + return nil +} + +// env returns the env value corresponding to the key or the default string. +func env(key, dflt string) string { + val, ok := os.LookupEnv(key) + if !ok { + return dflt + } + + return val +} diff --git a/playwright/connect.js b/playwright/connect.js index 5ae5904..c9fe854 100644 --- a/playwright/connect.js +++ b/playwright/connect.js @@ -21,11 +21,6 @@ const browserAddress = process.env.BROWSER_ADDRESS ? process.env.BROWSER_ADDRESS // web serveur url const baseURL = process.env.BASE_URL ? process.env.BASE_URL : 'http://127.0.0.1:1234'; -// measure general time. -const gstart = process.hrtime.bigint(); -// store all run durations -let metrics = []; - // Connect to an existing browser console.log("Connection to browser on " + browserAddress); const browser = await chromium.connectOverCDP({ @@ -36,7 +31,6 @@ const browser = await chromium.connectOverCDP({ } }); - const context = await browser.newContext({ baseURL: baseURL, }); diff --git a/playwright/request_interception.js b/playwright/request_interception.js new file mode 100644 index 0000000..42daea3 --- /dev/null +++ b/playwright/request_interception.js @@ -0,0 +1,103 @@ +// Copyright 2023-2024 Lightpanda (Selecy SAS) +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Import the Chromium browser into our scraper. +import { chromium } from 'playwright'; + +// browserAddress +const browserAddress = process.env.BROWSER_ADDRESS ? process.env.BROWSER_ADDRESS : 'ws://127.0.0.1:9222'; + +// web serveur url +const baseURL = process.env.BASE_URL ? process.env.BASE_URL : 'https://doesnotexist.localhost:9832'; + +// Connect to an existing browser +console.log("Connection to browser on " + browserAddress); +const browser = await chromium.connectOverCDP({ + endpointURL: browserAddress, + logger: { + isEnabled: (name, severity) => true, + log: (name, severity, message, args) => console.log(`${name} ${message}`) + } +}); + +const context = await browser.newContext({ + baseURL: baseURL, +}); + +const page = await context.newPage(); +await page.route('**', async (route, request) => { + const url = request.url(); + if (url === 'https://doesnotexist.localhost:9832/nope/') { + return route.continue({ + url: "https://httpbin.io/xhr/post", + }); + } + if (url === 'https://httpbin.io/post') { + return route.continue({ + method: 'POST', + url: 'https://HTTPBIN.io/post', + headers: {'pw-injected': 'great', 'content-type': 'application/x-www-form-urlencoded'}, + postData: 'over=9000&tea=keemun', + }); + } + + console.error("unexpected request: ", url); + return route.abort(); +}); +await page.goto('/nope/'); + +await page.waitForSelector('#response', {timeout: 5000}); +const response = await page.locator('#response').textContent(); +const data = JSON.parse(response); + +if (data.url !== 'http://HTTPBIN.io/post') { + console.log(data.url); + throw new Error("Expected URL to be 'http://HTTPBIN.io/post'"); +} + +if (data.headers['Pw-Injected'] != 'great') { + console.log(data.headers); + throw new Error("Expected 'Pw-Injected: great' header"); +} + +if (data.headers['Content-Type'] != 'application/x-www-form-urlencoded') { + console.log(data.headers); + throw new Error("Expected 'Content-Type: application/x-www-form-urlencoded' header"); +} + +if (data.headers['User-Agent'] != 'Lightpanda/1.0') { + console.log(data.headers); + throw new Error("Expected 'User-Agent: Lightpanda/1.0' header"); +} + +if (Object.keys(data.form).length != 2) { + console.log(data.form); + throw new Error("Expected 2 form field"); +} + +if (data.form['over'] != '9000') { + console.log(data.form); + throw new Error("Expected form field 'over: 9000'"); +} + +if (data.form['tea'] != 'keemun') { + console.log(data.form); + throw new Error("Expected form field 'tea: keemun'"); +} + +await page.close(); +await context.close(); + +// Turn off the browser to clean up after ourselves. +await browser.close(); diff --git a/public/campfire-commerce/script.js b/public/campfire-commerce/script.js index 446037d..8f7d42e 100644 --- a/public/campfire-commerce/script.js +++ b/public/campfire-commerce/script.js @@ -6,13 +6,16 @@ const detailsXHR = new XMLHttpRequest(); // blocked by https://github.com/lightpanda-io/browsercore/issues/186 // detailsXHR.open('GET', 'json/product.json'); -detailsXHR.open('GET', document.URL + 'json/product.json'); + detailsXHR.open('GET', document.URL + 'json/product.json'); detailsXHR.responseType = 'json'; detailsXHR.onload = function() { if (this.status === 200) { updateProductInfo(this.response); } }; + detailsXHR.onabort = function(err) { + document.getElementById('product-description').innerHTML = 'xhr: aborted'; + } detailsXHR.send(); // use fetch to retrieve reviews. diff --git a/puppeteer/request_interception.js b/puppeteer/request_interception.js new file mode 100644 index 0000000..51bad55 --- /dev/null +++ b/puppeteer/request_interception.js @@ -0,0 +1,94 @@ +// Copyright 2023-2024 Lightpanda (Selecy SAS) +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +'use scrict' + +import puppeteer from 'puppeteer-core'; + +// ws address +const browserAddress = process.env.BROWSER_ADDRESS ? process.env.BROWSER_ADDRESS : 'ws://127.0.0.1:9222'; + +// web serveur url +const baseURL = process.env.BASE_URL ? process.env.BASE_URL : 'http://127.0.0.1:1234'; + + +(async () => { + // Connect to the browser and open a new blank page + let opts = {}; + if (browserAddress.substring(0, 5) == 'ws://') { + opts.browserWSEndpoint = browserAddress; + } else { + opts.browserURL = browserAddress; + } + + const browser = await puppeteer.connect(opts); + const context = await browser.createBrowserContext(); + const page = await context.newPage(); + + await page.setRequestInterception(true); + page.on('request', req => { + if (req.isInterceptResolutionHandled()) return; + + const url = req.url(); + if (url.endsWith('reviews.json')) { + return req.respond({ + ok: true, + status: 200, + contentType: 'application/json', + body: `["over 9000!"]`, + }); + } + + if (url.endsWith('product.json')) { + return req.abort(); + } + + req.continue(); + }); + // Navigate the page to a URL + await page.goto(baseURL + '/campfire-commerce/'); + + await page.waitForFunction(() => { + const desc = document.querySelector('#product-description'); + return desc.textContent.length > 0; + }, {timeout: 100}); // timeout 100ms + + // ensure the reviews are loaded. + await page.waitForFunction(() => { + const reviews = document.querySelectorAll('#product-reviews > div'); + return reviews.length > 0; + }, {timeout: 100}); // timeout 100ms + + let res = {}; + + res.desc = await page.evaluate(() => { return document.querySelector('#product-description').textContent; }); + res.reviews = await page.evaluate(() => { + const r = document.querySelectorAll('#product-reviews > div > p'); + return Array.from(r).map((n) => n.textContent); + }); + + // assertions + if (res.desc != 'xhr: aborted') { + console.log(res); + throw new Error("invalid product description"); + } + + if (res.reviews.length != 1 || res.reviews[0] != 'over 9000!') { + console.log(res); + throw new Error("invalid reviews"); + } + + await page.close(); + await context.close(); + await browser.disconnect(); +})(); diff --git a/runner/main.go b/runner/main.go index ed4453d..bc3321f 100644 --- a/runner/main.go +++ b/runner/main.go @@ -106,14 +106,17 @@ func run(ctx context.Context, args []string, stdout, stderr io.Writer) error { {Bin: "node", Args: []string{"puppeteer/location_write.js"}}, {Bin: "node", Args: []string{"puppeteer/form.js"}}, {Bin: "node", Args: []string{"puppeteer/cookies.js"}}, + {Bin: "node", Args: []string{"puppeteer/request_interception.js"}}, {Bin: "node", Args: []string{"playwright/connect.js"}}, {Bin: "node", Args: []string{"playwright/cdp.js"}, Env: []string{"RUNS=2"}}, {Bin: "node", Args: []string{"playwright/dump.js"}}, {Bin: "node", Args: []string{"playwright/links.js"}, Env: []string{"BASE_URL=http://127.0.0.1:1234/campfire-commerce/"}}, {Bin: "node", Args: []string{"playwright/click.js"}}, - {Bin: "go", Args: []string{"run", "fetch/main.go", "http://127.0.0.1:1234/"}, Dir: "chromedp"}, - {Bin: "go", Args: []string{"run", "links/main.go", "http://127.0.0.1:1234/"}, Dir: "chromedp"}, + {Bin: "node", Args: []string{"playwright/request_interception.js"}}, + {Bin: "go", Args: []string{"run", "fetch/main.go", "http://127.0.0.1:1234/campfire-commerce/"}, Dir: "chromedp"}, + {Bin: "go", Args: []string{"run", "links/main.go", "http://127.0.0.1:1234/campfire-commerce/"}, Dir: "chromedp"}, {Bin: "go", Args: []string{"run", "click/main.go", "http://127.0.0.1:1234/"}, Dir: "chromedp"}, + {Bin: "go", Args: []string{"run", "ri/main.go", "http://127.0.0.1:1234/campfire-commerce/"}, Dir: "chromedp"}, } { if *verbose { t.Stderr = stderr