diff --git a/package-lock.json b/package-lock.json index faf603c..fb363be 100644 --- a/package-lock.json +++ b/package-lock.json @@ -10,6 +10,7 @@ "license": "MIT", "dependencies": { "fs-extra": "^10.0.1", + "iconv-lite": "^0.6.3", "long": "^4.0.0", "uuid": "^8.3.0" }, @@ -2300,6 +2301,17 @@ "node": ">=10.17.0" } }, + "node_modules/iconv-lite": { + "version": "0.6.3", + "resolved": "https://registry.npmjs.org/iconv-lite/-/iconv-lite-0.6.3.tgz", + "integrity": "sha512-4fCk79wshMdzMp2rH06qWrJE4iolqLhCUH+OiuIgU++RB0+94NlDL81atO7GX55uUKueo0txHNtvEyI6D7WdMw==", + "dependencies": { + "safer-buffer": ">= 2.1.2 < 3.0.0" + }, + "engines": { + "node": ">=0.10.0" + } + }, "node_modules/ignore": { "version": "5.2.0", "resolved": "https://registry.npmjs.org/ignore/-/ignore-5.2.0.tgz", @@ -3579,6 +3591,11 @@ "integrity": "sha512-Gd2UZBJDkXlY7GbJxfsE8/nvKkUEU1G38c1siN6QP6a9PT9MmHB8GnpscSmMJSoF8LOIrt8ud/wPtojys4G6+g==", "dev": true }, + "node_modules/safer-buffer": { + "version": "2.1.2", + "resolved": "https://registry.npmjs.org/safer-buffer/-/safer-buffer-2.1.2.tgz", + "integrity": "sha512-YZo3K82SD7Riyi0E1EQPojLz7kpepnSQI9IyPbHHg1XXXevb5dJI7tpyN2ADxGcQbHG7vcyRHk0cbwqcQriUtg==" + }, "node_modules/schema-utils": { "version": "3.1.1", "resolved": "https://registry.npmjs.org/schema-utils/-/schema-utils-3.1.1.tgz", @@ -6190,6 +6207,14 @@ "integrity": "sha512-B4FFZ6q/T2jhhksgkbEW3HBvWIfDW85snkQgawt07S7J5QXTk6BkNV+0yAeZrM5QpMAdYlocGoljn0sJ/WQkFw==", "dev": true }, + "iconv-lite": { + "version": "0.6.3", + "resolved": "https://registry.npmjs.org/iconv-lite/-/iconv-lite-0.6.3.tgz", + "integrity": "sha512-4fCk79wshMdzMp2rH06qWrJE4iolqLhCUH+OiuIgU++RB0+94NlDL81atO7GX55uUKueo0txHNtvEyI6D7WdMw==", + "requires": { + "safer-buffer": ">= 2.1.2 < 3.0.0" + } + }, "ignore": { "version": "5.2.0", "resolved": "https://registry.npmjs.org/ignore/-/ignore-5.2.0.tgz", @@ -7143,6 +7168,11 @@ "integrity": "sha512-Gd2UZBJDkXlY7GbJxfsE8/nvKkUEU1G38c1siN6QP6a9PT9MmHB8GnpscSmMJSoF8LOIrt8ud/wPtojys4G6+g==", "dev": true }, + "safer-buffer": { + "version": "2.1.2", + "resolved": "https://registry.npmjs.org/safer-buffer/-/safer-buffer-2.1.2.tgz", + "integrity": "sha512-YZo3K82SD7Riyi0E1EQPojLz7kpepnSQI9IyPbHHg1XXXevb5dJI7tpyN2ADxGcQbHG7vcyRHk0cbwqcQriUtg==" + }, "schema-utils": { "version": "3.1.1", "resolved": "https://registry.npmjs.org/schema-utils/-/schema-utils-3.1.1.tgz", diff --git a/package.json b/package.json index 44c01a1..9099254 100644 --- a/package.json +++ b/package.json @@ -40,7 +40,8 @@ "dependencies": { "fs-extra": "^10.0.1", "long": "^4.0.0", - "uuid": "^8.3.0" + "uuid": "^8.3.0", + "iconv-lite": "^0.6.3" }, "devDependencies": { "@types/chai": "^4.2.22", diff --git a/src/constants.ts b/src/constants.ts index b6fc04a..cd2118b 100644 --- a/src/constants.ts +++ b/src/constants.ts @@ -10,6 +10,7 @@ export enum HeaderName { export enum MediaType { multipartForm = 'multipart/form-data', + multipartPrefix = 'multipart/', urlEncodedForm = 'application/x-www-form-urlencoded', octetStream = 'application/octet-stream', json = 'application/json', diff --git a/src/converters/RpcConverters.ts b/src/converters/RpcConverters.ts index 80b7ff7..a0d279f 100644 --- a/src/converters/RpcConverters.ts +++ b/src/converters/RpcConverters.ts @@ -19,7 +19,7 @@ import { InternalException } from '../utils/InternalException'; * @param typedData ITypedData object containing one of a string, json, or bytes property * @param convertStringToJson Optionally parse the string input type as JSON */ -export function fromTypedData(typedData?: RpcTypedData, convertStringToJson = true) { +export function fromTypedData(typedData?: RpcTypedData | null, convertStringToJson = true) { typedData = typedData || {}; let str = typedData.string || typedData.json; if (str !== undefined) { diff --git a/src/converters/RpcHttpConverters.ts b/src/converters/RpcHttpConverters.ts index 8c0ea8c..27b3354 100644 --- a/src/converters/RpcHttpConverters.ts +++ b/src/converters/RpcHttpConverters.ts @@ -20,7 +20,7 @@ import { * This is to avoid breaking changes in v2. * @param body The body from the RPC layer. */ -export function fromRpcHttpBody(body: RpcTypedData) { +export function fromRpcHttpBody(body: RpcTypedData | null | undefined) { if (body && body.bytes) { return (body.bytes).toString(); } else { diff --git a/src/http/Request.ts b/src/http/Request.ts index 1cba4d1..975cc70 100644 --- a/src/http/Request.ts +++ b/src/http/Request.ts @@ -11,7 +11,8 @@ import { HttpRequestUser, } from '@azure/functions'; import { RpcHttpData, RpcTypedData } from '@azure/functions-core'; -import { HeaderName } from '../constants'; +import { decode } from 'iconv-lite'; +import { HeaderName, MediaType } from '../constants'; import { fromTypedData } from '../converters/RpcConverters'; import { fromNullableMapping, fromRpcHttpBody } from '../converters/RpcHttpConverters'; import { parseForm } from '../parsers/parseForm'; @@ -26,6 +27,7 @@ export class Request implements HttpRequest { params: HttpRequestParams; body?: any; rawBody?: any; + bufferBody?: Buffer; #cachedUser?: HttpRequestUser | null; @@ -36,8 +38,31 @@ export class Request implements HttpRequest { this.headers = fromNullableMapping(rpcHttp.nullableHeaders, rpcHttp.headers); this.query = fromNullableMapping(rpcHttp.nullableQuery, rpcHttp.query); this.params = fromNullableMapping(rpcHttp.nullableParams, rpcHttp.params); - this.body = fromTypedData(rpcHttp.body); - this.rawBody = fromRpcHttpBody(rpcHttp.body); + + if (rpcHttp.body?.bytes) { + this.bufferBody = Buffer.from(rpcHttp.body.bytes); + // We turned on the worker capability to always receive bytes instead of a string (RawHttpBodyBytes) so that we could introduce the `bufferBody` property + // However, we need to replicate the old host behavior for the `body` and `rawBody` properties so that we don't break anyone + // https://github.com/Azure/azure-functions-nodejs-worker/issues/294 + // NOTE: The tests for this are in the e2e test folder of the worker. This is so we can test the full .net host behavior of encoding/parsing/etc. + // https://github.com/Azure/azure-functions-nodejs-worker/blob/b109082f9b85b42af1de00db4192483460214d81/test/end-to-end/Azure.Functions.NodejsWorker.E2E/Azure.Functions.NodejsWorker.E2E/HttpEndToEndTests.cs + + const contentType = this.get(HeaderName.contentType)?.toLowerCase(); + let legacyBody: RpcTypedData | undefined | null; + if (contentType === MediaType.octetStream || contentType?.startsWith(MediaType.multipartPrefix)) { + // If the content type was octet or multipart, the host would leave the body as bytes + // https://github.com/Azure/azure-functions-host/blob/9ac904e34b744d95a6f746921556235d4b2b3f0f/src/WebJobs.Script.Grpc/MessageExtensions/GrpcMessageConversionExtensions.cs#L233 + legacyBody = rpcHttp.body; + } else { + // Otherwise the host would decode the buffer to a string + legacyBody = { + string: decodeBuffer(this.bufferBody), + }; + } + + this.body = fromTypedData(legacyBody); + this.rawBody = fromRpcHttpBody(legacyBody); + } } get user(): HttpRequestUser | null { @@ -61,3 +86,39 @@ export class Request implements HttpRequest { } } } + +/** + * The host used utf8 by default, but supported `detectEncodingFromByteOrderMarks` so we have to replicate that + * Host code: https://github.com/Azure/azure-webjobs-sdk-extensions/blob/03cb2ce82db74ed5a2f3299e8a84a6c35835c269/src/WebJobs.Extensions.Http/Extensions/HttpRequestExtensions.cs#L27 + * .NET code: https://github.com/dotnet/runtime/blob/e55c908229e36f99a52745d4ee85316a0e8bb6a2/src/libraries/System.Private.CoreLib/src/System/IO/StreamReader.cs#L469 + * .NET description of encoding preambles: https://docs.microsoft.com/en-us/dotnet/api/system.text.encoding.getpreamble?view=net-6.0#remarks + **/ +function decodeBuffer(buffer: Buffer): string | undefined { + let encoding = 'utf8'; + if (buffer[0] === 0xfe && buffer[1] === 0xff) { + encoding = 'utf16be'; // The same as `Encoding.BigEndianUnicode` in .NET + buffer = compressBuffer(buffer, 2); + } else if (buffer[0] === 0xff && buffer[1] === 0xfe) { + if (buffer[2] !== 0 || buffer[3] !== 0) { + encoding = 'utf16le'; // The same as `Encoding.Unicode` in .NET + buffer = compressBuffer(buffer, 2); + } else { + encoding = 'utf32le'; + buffer = compressBuffer(buffer, 4); + } + } else if (buffer[0] === 0xef && buffer[1] === 0xbb && buffer[2] === 0xbf) { + encoding = 'utf8'; + buffer = compressBuffer(buffer, 3); + } else if (buffer[0] === 0 && buffer[1] === 0 && buffer[2] === 0xfe && buffer[3] === 0xff) { + encoding = 'utf32be'; + buffer = compressBuffer(buffer, 4); + } + + // NOTE: Node.js doesn't support all the above encodings by default, so we have to use "iconv-lite" to help + // Here are the iconv-lite supported encodings: https://github.com/ashtuchkin/iconv-lite/wiki/Supported-Encodings + return decode(buffer, encoding); +} + +function compressBuffer(buffer: Buffer, n: number): Buffer { + return buffer.subarray(n); +} diff --git a/types/http.d.ts b/types/http.d.ts index 14750b4..643669f 100644 --- a/types/http.d.ts +++ b/types/http.d.ts @@ -91,13 +91,23 @@ export interface HttpRequest { user: HttpRequestUser | null; /** * The HTTP request body. + * If the media type is 'application/octet-stream' or 'multipart/*', this will be a Buffer + * If the value is a JSON parse-able string, this will be the parsed object + * Otherwise, this will be a string */ body?: any; + /** - * The HTTP request body as a UTF-8 string. + * The HTTP request body as a UTF-8 string. In this case, the name "raw" is used because the string will never be parsed to an object even if it is json. + * Improvements to the naming are tracked in https://github.com/Azure/azure-functions-nodejs-worker/issues/294 */ rawBody?: any; + /** + * The raw Buffer representing the body before any decoding or parsing has been done + */ + bufferBody?: Buffer; + /** * Parses the body and returns an object representing a form * @throws if the content type is not "multipart/form-data" or "application/x-www-form-urlencoded"