From b8171f4ccf7b951bff2bfa7dea27fcde1afa7670 Mon Sep 17 00:00:00 2001 From: Devon Carew Date: Thu, 28 Dec 2023 19:19:49 -0800 Subject: [PATCH 1/7] scrape MDN for API documentation --- pubspec.yaml | 4 + tool/scrape_mdn.dart | 239 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 243 insertions(+) create mode 100644 tool/scrape_mdn.dart diff --git a/pubspec.yaml b/pubspec.yaml index 3812621f..85649ebe 100644 --- a/pubspec.yaml +++ b/pubspec.yaml @@ -13,8 +13,12 @@ dev_dependencies: build_runner: ^2.4.0 build_web_compilers: ^4.0.7 code_builder: ^4.9.0 + collection: ^1.18.0 dart_flutter_team_lints: ^2.0.0 dart_style: ^2.2.4 + html: ^0.15.0 + http: ^1.0.0 io: ^1.0.4 path: ^1.8.3 + pool: ^1.5.0 test: ^1.22.2 diff --git a/tool/scrape_mdn.dart b/tool/scrape_mdn.dart new file mode 100644 index 00000000..7cb17a68 --- /dev/null +++ b/tool/scrape_mdn.dart @@ -0,0 +1,239 @@ +// Copyright (c) 2023, the Dart project authors. Please see the AUTHORS file +// for details. All rights reserved. Use of this source code is governed by a +// BSD-style license that can be found in the LICENSE file. + +// ignore_for_file: prefer_expression_function_bodies + +import 'dart:convert'; +import 'dart:io'; + +import 'package:html/dom.dart' as dom; +import 'package:html/dom_parsing.dart' show TreeVisitor; +import 'package:html/parser.dart' show parse; +import 'package:http/http.dart' as http; +import 'package:pool/pool.dart'; + +const apiUrl = 'https://developer.mozilla.org/en-US/docs/Web/API'; + +Future main(List args) async { + final client = http.Client(); + + // Get the API page with all the interface references. + final response = await client.get(Uri.parse(apiUrl)); + final doc = parse(response.body); + + final section = doc.querySelector('section[aria-labelledby=interfaces]')!; + final anchorItems = section.querySelectorAll('li a'); + + final interfaceNames = []; + + for (final item in anchorItems) { + final href = item.attributes['href']!; + final interfaceName = href.split('/').last; + + interfaceNames.add(interfaceName); + } + + interfaceNames.sort(); + + print('${interfaceNames.length} items read from $apiUrl.'); + + final pool = Pool(6); + + final interfaces = await pool.forEach(interfaceNames, (item) async { + return populateInterfaceInfo(item, client: client); + }).toList(); + + client.close(); + + const encoder = JsonEncoder.withIndent(' '); + + final file = File('tool/mdn.json'); + final json = { + '__meta__': { + 'source': '[MDN Web Docs](https://developer.mozilla.org/en-US/docs/Web)', + 'license': + '[CC-BY-SA 2.5](https://creativecommons.org/licenses/by-sa/2.5/)', + }, + for (var i in interfaces) i.name: i.asJson, + }; + file.writeAsStringSync('${encoder.convert(json)}\n'); +} + +Future populateInterfaceInfo( + String interfaceName, { + required http.Client client, +}) async { + print(' $interfaceName'); + + final info = InterfaceInfo(name: interfaceName); + + final url = '$apiUrl/$interfaceName'; + + // Retrieve the interface docs page. + final response = await client.get(Uri.parse(url)); + final doc = parse(response.body); + + final article = doc.querySelector('main article')!; + final content = article.querySelector('div[class=section-content]')!; + + info.docs = ''' +${_nodesToMarkdown(content.children)} + +See also $url.'''; + + // Gather property info. + for (final dt in article.querySelectorAll('dt[id]')) { + final id = dt.attributes['id']!; + + if (id.startsWith('${interfaceName.toLowerCase()}.')) { + final name = id.substring(interfaceName.length + 1); + final property = Property(name: name); + + final index = dt.parent!.children.indexOf(dt); + final dd = dt.parent!.children[index + 1]; + if (dd.localName == 'dd') { + property.docs = _nodesToMarkdown(dd.children); + } + + info.properties.add(property); + } + } + + info.properties.sort((a, b) => a.name.compareTo(b.name)); + + return info; +} + +class InterfaceInfo { + final String name; + late final String docs; + + final List properties = []; + + InterfaceInfo({required this.name}); + + Map get asJson => { + 'docs': docs, + if (properties.isNotEmpty) + 'properties': {for (var p in properties) p.name: p.docs}, + }; +} + +class Property { + final String name; + late final String docs; + + Property({required this.name}); +} + +String _nodesToMarkdown(List nodes) { + return nodes.map(_nodeToMarkdown).whereType().join('\n\n'); +} + +String? _nodeToMarkdown(dom.Element node) { + String value; + + switch (node.localName) { + case 'p': + value = getTextForNote(node); + break; + case 'blockquote': + value = '> ${getTextForNote(node)}'; + break; + case 'ul': + case 'ol': + final buf = StringBuffer(); + for (var child in node.querySelectorAll('li')) { + buf.writeln('- ${getTextForNote(child)}'); + } + value = buf.toString(); + break; + case 'div': + if (node.classes.contains('notecard')) { + value = + node.children.map(_nodeToMarkdown).whereType().join('\n'); + } else if (node.classes.contains('code-example')) { + final buf = StringBuffer(); + final pre = node.querySelector('pre')!; + buf.writeln('```'); + buf.writeln(pre.text.trimRight()); + buf.writeln('```'); + value = buf.toString(); + } else { + throw Exception('unhandled div type: ${node.classes}'); + } + break; + case 'dl': + final buf = StringBuffer(); + buf.writeln('| --- | --- |'); + for (var child in node.children) { + if (child.localName == 'dt') { + buf.write('| ${getTextForNote(child).trim()} '); + } else if (child.localName == 'dd') { + buf.writeln('| ${getTextForNote(child).trim()} |'); + } + } + value = buf.toString(); + break; + case 'figure': + case 'svg': + return null; + default: + throw Exception('unhandled node type: ${node.localName}'); + } + + return value.trim(); +} + +String getTextForNote(dom.Element node) { + final visitor = MarkdownTextVisitor(); + visitor.visit(node); + return visitor.toString(); +} + +class MarkdownTextVisitor extends TreeVisitor { + final StringBuffer buf = StringBuffer(); + + @override + void visitText(dom.Text node) { + buf.write(node.data); + } + + @override + void visitElement(dom.Element node) { + switch (node.localName) { + case 'strong': + buf.write('**'); + visitChildren(node); + buf.write('**'); + break; + case 'br': + buf.writeln(); + buf.writeln(); + break; + case 'a': + // TODO(devoncarew): Fixup relative urls? Convert to symbol references? + final href = node.attributes['href']; + if (href != null && href.startsWith('https://')) { + buf.write('['); + visitChildren(node); + buf.write(']($href)'); + } else { + visitChildren(node); + } + break; + case 'code': + buf.write('`'); + visitChildren(node); + buf.write('`'); + break; + default: + visitChildren(node); + break; + } + } + + @override + String toString() => buf.toString(); +} From 2203fb598dca204ebd9b9eb1b5a26b7fbd1c431e Mon Sep 17 00:00:00 2001 From: Devon Carew Date: Thu, 28 Dec 2023 19:20:19 -0800 Subject: [PATCH 2/7] commit the parsed documentation information --- tool/mdn.json | 8653 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 8653 insertions(+) create mode 100644 tool/mdn.json diff --git a/tool/mdn.json b/tool/mdn.json new file mode 100644 index 00000000..de6ad1bf --- /dev/null +++ b/tool/mdn.json @@ -0,0 +1,8653 @@ +{ + "__meta__": { + "source": "[MDN Web Docs](https://developer.mozilla.org/en-US/docs/Web)", + "license": "[CC-BY-SA 2.5](https://creativecommons.org/licenses/by-sa/2.5/)" + }, + "ANGLE_instanced_arrays": { + "docs": "The **`ANGLE_instanced_arrays`** extension is part of the WebGL API and allows to draw the same object, or groups of similar objects multiple times, if they share the same vertex data, primitive count and type.\n\nWebGL extensions are available using the `WebGLRenderingContext.getExtension()` method. For more information, see also Using Extensions in the WebGL tutorial.\n\n**Note:** This extension is only available to WebGL1 contexts. In WebGL2, the functionality of this extension is available on the WebGL2 context by default and the constants and methods are available without the \"`ANGLE`\" suffix.\nDespite the name \"ANGLE\", this extension works on any device if the hardware supports it and not just on Windows when using the ANGLE library. \"ANGLE\" just indicates that this extension has been written by the ANGLE library authors.\n\nSee also https://developer.mozilla.org/en-US/docs/Web/API/ANGLE_instanced_arrays." + }, + "AesCbcParams": { + "docs": "The **`AesCbcParams`** dictionary of the Web Crypto API represents the object that should be passed as the `algorithm` parameter into `SubtleCrypto.encrypt()`, `SubtleCrypto.decrypt()`, `SubtleCrypto.wrapKey()`, or `SubtleCrypto.unwrapKey()`, when using the AES-CBC algorithm.\n\nSee also https://developer.mozilla.org/en-US/docs/Web/API/AesCbcParams." + }, + "Accelerometer": { + "docs": "**Experimental:** **This is an experimental technology**\n\nCheck the Browser compatibility table carefully before using this in production.\n\nThe **`Accelerometer`** interface of the Sensor APIs provides on each reading the acceleration applied to the device along all three axes.\n\nTo use this sensor, the user must grant permission to the `'accelerometer'`, device sensor through the Permissions API.\n\nThis feature may be blocked by a Permissions Policy set on your server.\n\nSee also https://developer.mozilla.org/en-US/docs/Web/API/Accelerometer.", + "properties": { + "x": "Returns a double containing the acceleration of the device along the device's x axis.", + "y": "Returns a double containing the acceleration of the device along the device's y axis.", + "z": "Returns a double containing the acceleration of the device along the device's z axis." + } + }, + "AbortController": { + "docs": "The **`AbortController`** interface represents a controller object that allows you to abort one or more Web requests as and when desired.\n\nYou can create a new `AbortController` object using the `AbortController()` constructor. Communicating with a DOM request is done using an `AbortSignal` object.\n\nSee also https://developer.mozilla.org/en-US/docs/Web/API/AbortController.", + "properties": { + "abort": "Aborts a DOM request before it has completed. This is able to abort fetch requests, consumption of any response bodies, and streams.", + "signal": "Returns an `AbortSignal` object instance, which can be used to communicate with, or to abort, a DOM request." + } + }, + "AbstractRange": { + "docs": "The **`AbstractRange`** abstract interface is the base class upon which all DOM range types are defined. A **range** is an object that indicates the start and end points of a section of content within the document.\n\n**Note:** As an abstract interface, you will not directly instantiate an object of type `AbstractRange`. Instead, you will use the `Range` or `StaticRange` interfaces. To understand the difference between those two interfaces, and how to choose which is appropriate for your needs, consult each interface's documentation.\n\nSee also https://developer.mozilla.org/en-US/docs/Web/API/AbstractRange." + }, + "AesGcmParams": { + "docs": "The **`AesGcmParams`** dictionary of the Web Crypto API represents the object that should be passed as the `algorithm` parameter into `SubtleCrypto.encrypt()`, `SubtleCrypto.decrypt()`, `SubtleCrypto.wrapKey()`, or `SubtleCrypto.unwrapKey()`, when using the AES-GCM algorithm.\n\nFor details of how to supply appropriate values for this parameter, see the specification for AES-GCM: [NIST SP800-38D](https://nvlpubs.nist.gov/nistpubs/Legacy/SP/nistspecialpublication800-38d.pdf), in particular section 5.2.1.1 on Input Data.\n\nSee also https://developer.mozilla.org/en-US/docs/Web/API/AesGcmParams." + }, + "AmbientLightSensor": { + "docs": "**Experimental:** **This is an experimental technology**\n\nCheck the Browser compatibility table carefully before using this in production.\n\nThe **`AmbientLightSensor`** interface of the Sensor APIs returns the current light level or illuminance of the ambient light around the hosting device.\n\nTo use this sensor, the user must grant permission to the `'ambient-light-sensor'` device sensor through the Permissions API.\n\nThis feature may be blocked by a Permissions Policy set on your server.\n\nSee also https://developer.mozilla.org/en-US/docs/Web/API/AmbientLightSensor.", + "properties": { + "illuminance": "Returns the current light level in [lux](https://en.wikipedia.org/wiki/Lux) of the ambient light level around the hosting device." + } + }, + "AesCtrParams": { + "docs": "The **`AesCtrParams`** dictionary of the Web Crypto API represents the object that should be passed as the `algorithm` parameter into `SubtleCrypto.encrypt()`, `SubtleCrypto.decrypt()`, `SubtleCrypto.wrapKey()`, or `SubtleCrypto.unwrapKey()`, when using the AES-CTR algorithm.\n\nAES is a block cipher, meaning that it splits the message into blocks and encrypts it a block at a time. In CTR mode, every time a block of the message is encrypted, an extra block of data is mixed in. This extra block is called the \"counter block\".\n\nA given counter block value must never be used more than once with the same key:\n\n- Given a message n blocks long, a different counter block must be used for every block.\n- If the same key is used to encrypt more than one message, a different counter block must be used for all blocks across all messages.\n\nTypically this is achieved by splitting the initial counter block value into two concatenated parts:\n\n- A [nonce](https://en.wikipedia.org/wiki/Cryptographic_nonce) (that is, a number that may only be used once). The nonce part of the block stays the same for every block in the message. Each time a new message is to be encrypted, a new nonce is chosen. Nonces don't have to be secret, but they must not be reused with the same key.\n- A counter. This part of the block gets incremented each time a block is encrypted.\n\nEssentially: the nonce should ensure that counter blocks are not reused from one message to the next, while the counter should ensure that counter blocks are not reused within a single message.\n\n**Note:** See [Appendix B of the NIST SP800-38A standard](https://nvlpubs.nist.gov/nistpubs/Legacy/SP/nistspecialpublication800-38a.pdf#%5B%7B%22num%22%3A70%2C%22gen%22%3A0%7D%2C%7B%22name%22%3A%22Fit%22%7D%5D) for more information.\n\nSee also https://developer.mozilla.org/en-US/docs/Web/API/AesCtrParams." + }, + "AbortSignal": { + "docs": "The **`AbortSignal`** interface represents a signal object that allows you to communicate with a DOM request (such as a fetch request) and abort it if required via an `AbortController` object.\n\nSee also https://developer.mozilla.org/en-US/docs/Web/API/AbortSignal.", + "properties": { + "abort": "Returns an **`AbortSignal`** instance that is already set as aborted.", + "aborted": "A Boolean that indicates whether the request(s) the signal is communicating with is/are aborted (`true`) or not (`false`).", + "any": "Returns an **`AbortSignal`** that aborts when any of the given abort signals abort.", + "reason": "A JavaScript value providing the abort reason, once the signal has aborted.", + "throwifaborted": "Throws the signal's abort `reason` if the signal has been aborted; otherwise it does nothing.", + "timeout": "Returns an **`AbortSignal`** instance that will automatically abort after a specified time." + } + }, + "AbsoluteOrientationSensor": { + "docs": "The **`AbsoluteOrientationSensor`** interface of the Sensor APIs describes the device's physical orientation in relation to the Earth's reference coordinate system.\n\nTo use this sensor, the user must grant permission to the `'accelerometer'`, `'gyroscope'`, and `'magnetometer'` device sensors through the Permissions API.\n\nThis feature may be blocked by a Permissions Policy set on your server.\n\nSee also https://developer.mozilla.org/en-US/docs/Web/API/AbsoluteOrientationSensor." + }, + "AnimationEffect": { + "docs": "The `AnimationEffect` interface of the Web Animations API is an interface representing animation effects.\n\n`AnimationEffect` is an abstract interface and so isn't directly instantiable. However, concrete interfaces such as `KeyframeEffect` inherit from it, and instances of these interfaces can be passed to `Animation` objects for playing, and may also be used by CSS Animations and Transitions.\n\nSee also https://developer.mozilla.org/en-US/docs/Web/API/AnimationEffect.", + "properties": { + "getcomputedtiming": "Returns the calculated timing properties for this `AnimationEffect`.", + "gettiming": "Returns the object associated with the animation containing all the animation's timing values.", + "updatetiming": "Updates the specified timing properties of this `AnimationEffect`." + } + }, + "AnimationPlaybackEvent": { + "docs": "The AnimationPlaybackEvent interface of the Web Animations API represents animation events.\n\nAs animations play, they report changes to their `playState` through animation events.\n\nSee also https://developer.mozilla.org/en-US/docs/Web/API/AnimationPlaybackEvent.", + "properties": { + "currenttime": "The current time of the animation that generated the event.", + "timelinetime": "The time value of the timeline of the animation that generated the event." + } + }, + "AnimationEvent": { + "docs": "The **`AnimationEvent`** interface represents events providing information related to animations.\n\nSee also https://developer.mozilla.org/en-US/docs/Web/API/AnimationEvent.", + "properties": { + "animationname": "A string containing the value of the `animation-name` that generated the animation.", + "elapsedtime": "A `float` giving the amount of time the animation has been running, in seconds, when this event fired, excluding any time the animation was paused. For an `animationstart` event, `elapsedTime` is `0.0` unless there was a negative value for `animation-delay`, in which case the event will be fired with `elapsedTime` containing `(-1 * delay)`.", + "pseudoelement": "A string, starting with `'::'`, containing the name of the pseudo-element the animation runs on. If the animation doesn't run on a pseudo-element but on the element, an empty string: `''`." + } + }, + "AesKeyGenParams": { + "docs": "The **`AesKeyGenParams`** dictionary of the Web Crypto API represents the object that should be passed as the `algorithm` parameter into `SubtleCrypto.generateKey()`, when generating an AES key: that is, when the algorithm is identified as any of AES-CBC, AES-CTR, AES-GCM, or AES-KW.\n\nSee also https://developer.mozilla.org/en-US/docs/Web/API/AesKeyGenParams." + }, + "AnalyserNode": { + "docs": "The **`AnalyserNode`** interface represents a node able to provide real-time frequency and time-domain analysis information. It is an `AudioNode` that passes the audio stream unchanged from the input to the output, but allows you to take the generated data, process it, and create audio visualizations.\n\nAn `AnalyserNode` has exactly one input and one output. The node works even if the output is not connected.\n\n\n\nSee also https://developer.mozilla.org/en-US/docs/Web/API/AnalyserNode.", + "properties": { + "fftsize": "An unsigned long value representing the size of the FFT ([Fast Fourier Transform](https://en.wikipedia.org/wiki/Fast_Fourier_transform)) to be used to determine the frequency domain.", + "frequencybincount": "An unsigned long value half that of the FFT size. This generally equates to the number of data values you will have to play with for the visualization.", + "getbytefrequencydata": "Copies the current frequency data into a `Uint8Array` (unsigned byte array) passed into it.", + "getbytetimedomaindata": "Copies the current waveform, or time-domain, data into a `Uint8Array` (unsigned byte array) passed into it.", + "getfloatfrequencydata": "Copies the current frequency data into a `Float32Array` array passed into it.", + "getfloattimedomaindata": "Copies the current waveform, or time-domain, data into a `Float32Array` array passed into it.", + "maxdecibels": "A double value representing the maximum power value in the scaling range for the FFT analysis data, for conversion to unsigned byte values — basically, this specifies the maximum value for the range of results when using `getByteFrequencyData()`.", + "mindecibels": "A double value representing the minimum power value in the scaling range for the FFT analysis data, for conversion to unsigned byte values — basically, this specifies the minimum value for the range of results when using `getByteFrequencyData()`.", + "smoothingtimeconstant": "A double value representing the averaging constant with the last analysis frame — basically, it makes the transition between values over time smoother." + } + }, + "AudioContext": { + "docs": "The `AudioContext` interface represents an audio-processing graph built from audio modules linked together, each represented by an `AudioNode`.\n\nAn audio context controls both the creation of the nodes it contains and the execution of the audio processing, or decoding. You need to create an `AudioContext` before you do anything else, as everything happens inside a context. It's recommended to create one AudioContext and reuse it instead of initializing a new one each time, and it's OK to use a single `AudioContext` for several different audio sources and pipeline concurrently.\n\nSee also https://developer.mozilla.org/en-US/docs/Web/API/AudioContext.", + "properties": { + "baselatency": "Returns the number of seconds of processing latency incurred by the `AudioContext` passing the audio from the `AudioDestinationNode` to the audio subsystem.", + "close": "Closes the audio context, releasing any system audio resources that it uses.", + "createmediaelementsource": "Creates a `MediaElementAudioSourceNode` associated with an `HTMLMediaElement`. This can be used to play and manipulate audio from `