|
| 1 | +/*--------------------------------------------------------------------------------------------- |
| 2 | + * Copyright (c) Microsoft Corporation. All rights reserved. |
| 3 | + * Licensed under the MIT License. See License.txt in the project root for license information. |
| 4 | + *--------------------------------------------------------------------------------------------*/ |
| 5 | + |
| 6 | +const enum CharCode { |
| 7 | + asterisk = 0x2a, // * |
| 8 | + backSlash = 0x5c, // \ |
| 9 | + closeBrace = 0x7d, // } |
| 10 | + closeBracket = 0x5d, // ] |
| 11 | + comma = 0x2c, // , |
| 12 | + doubleQuote = 0x22, // " |
| 13 | + slash = 0x2f, // / |
| 14 | + |
| 15 | + byteOrderMark = 0xfeff, |
| 16 | + |
| 17 | + // line terminator characters (see https://en.wikipedia.org/wiki/Newline#Unicode) |
| 18 | + carriageReturn = 0x0d, |
| 19 | + formFeed = 0x0c, |
| 20 | + lineFeed = 0x0a, |
| 21 | + lineSeparator = 0x2028, |
| 22 | + nextLine = 0x85, |
| 23 | + paragraphSeparator = 0x2029, |
| 24 | + verticalTab = 0x0b, |
| 25 | + |
| 26 | + // whitespace characters (see https://en.wikipedia.org/wiki/Whitespace_character#Unicode) |
| 27 | + tab = 0x09, |
| 28 | + space = 0x20, |
| 29 | + nonBreakingSpace = 0xa0, |
| 30 | + ogham = 0x1680, |
| 31 | + enQuad = 0x2000, |
| 32 | + emQuad = 0x2001, |
| 33 | + enSpace = 0x2002, |
| 34 | + emSpace = 0x2003, |
| 35 | + threePerEmSpace = 0x2004, |
| 36 | + fourPerEmSpace = 0x2005, |
| 37 | + sixPerEmSpace = 0x2006, |
| 38 | + figureSpace = 0x2007, |
| 39 | + punctuationSpace = 0x2008, |
| 40 | + thinSpace = 0x2009, |
| 41 | + hairSpace = 0x200a, |
| 42 | + zeroWidthSpace = 0x200b, |
| 43 | + narrowNoBreakSpace = 0x202f, |
| 44 | + mathematicalSpace = 0x205f, |
| 45 | + ideographicSpace = 0x3000, |
| 46 | +} |
| 47 | + |
| 48 | +function isLineBreak(code: number) { |
| 49 | + return code === CharCode.lineFeed |
| 50 | + || code === CharCode.carriageReturn |
| 51 | + || code === CharCode.verticalTab |
| 52 | + || code === CharCode.formFeed |
| 53 | + || code === CharCode.lineSeparator |
| 54 | + || code === CharCode.paragraphSeparator; |
| 55 | +} |
| 56 | + |
| 57 | +function isWhitespace(code: number) { |
| 58 | + return code === CharCode.space |
| 59 | + || code === CharCode.tab |
| 60 | + || code === CharCode.lineFeed |
| 61 | + || code === CharCode.verticalTab |
| 62 | + || code === CharCode.formFeed |
| 63 | + || code === CharCode.carriageReturn |
| 64 | + || code === CharCode.nextLine |
| 65 | + || code === CharCode.nonBreakingSpace |
| 66 | + || code === CharCode.ogham |
| 67 | + || (code >= CharCode.enQuad && code <= CharCode.zeroWidthSpace) |
| 68 | + || code === CharCode.lineSeparator |
| 69 | + || code === CharCode.paragraphSeparator |
| 70 | + || code === CharCode.narrowNoBreakSpace |
| 71 | + || code === CharCode.mathematicalSpace |
| 72 | + || code === CharCode.ideographicSpace |
| 73 | + || code === CharCode.byteOrderMark; |
| 74 | +} |
| 75 | + |
| 76 | +function cleanJsonText(text: string) { |
| 77 | + |
| 78 | + let parts: string[] = []; |
| 79 | + let partStart = 0; |
| 80 | + |
| 81 | + let index = 0; |
| 82 | + let length = text.length; |
| 83 | + |
| 84 | + function next(): number | undefined { |
| 85 | + const result = peek(); |
| 86 | + index++; |
| 87 | + return result; |
| 88 | + } |
| 89 | + |
| 90 | + function peek(offset: number = 0): number | undefined { |
| 91 | + if ((index + offset) < length) { |
| 92 | + return text.charCodeAt(index + offset); |
| 93 | + } |
| 94 | + else { |
| 95 | + return undefined; |
| 96 | + } |
| 97 | + } |
| 98 | + |
| 99 | + function peekPastWhitespace(): number | undefined { |
| 100 | + let pos = index; |
| 101 | + let code = undefined; |
| 102 | + |
| 103 | + do { |
| 104 | + code = text.charCodeAt(pos); |
| 105 | + pos++; |
| 106 | + } |
| 107 | + while (isWhitespace(code)); |
| 108 | + |
| 109 | + return code; |
| 110 | + } |
| 111 | + |
| 112 | + function scanString() { |
| 113 | + while (true) { |
| 114 | + if (index >= length) { // string ended unexpectedly |
| 115 | + break; |
| 116 | + } |
| 117 | + |
| 118 | + let code = next(); |
| 119 | + |
| 120 | + if (code === CharCode.doubleQuote) { |
| 121 | + // End of string. We're done |
| 122 | + break; |
| 123 | + } |
| 124 | + |
| 125 | + if (code === CharCode.backSlash) { |
| 126 | + // Skip escaped character. We don't care about verifying the escape sequence. |
| 127 | + // We just don't want to accidentally scan an escaped double-quote as the end of the string. |
| 128 | + index++; |
| 129 | + } |
| 130 | + |
| 131 | + if (isLineBreak(code)) { |
| 132 | + // string ended unexpectedly |
| 133 | + break; |
| 134 | + } |
| 135 | + } |
| 136 | + } |
| 137 | + |
| 138 | + while (true) { |
| 139 | + let code = next(); |
| 140 | + |
| 141 | + switch (code) { |
| 142 | + // byte-order mark |
| 143 | + case CharCode.byteOrderMark: |
| 144 | + // We just skip the byte-order mark |
| 145 | + parts.push(text.substring(partStart, index - 1)); |
| 146 | + partStart = index; |
| 147 | + |
| 148 | + // strings |
| 149 | + case CharCode.doubleQuote: |
| 150 | + scanString(); |
| 151 | + break; |
| 152 | + |
| 153 | + // comments |
| 154 | + case CharCode.slash: |
| 155 | + // Single-line comment |
| 156 | + if (peek() === CharCode.slash) { |
| 157 | + // Be careful not to include the first slash in the text part. |
| 158 | + parts.push(text.substring(partStart, index - 1)); |
| 159 | + |
| 160 | + // Start after the second slash and scan until a line-break character is encountered. |
| 161 | + index++; |
| 162 | + while (index < length) { |
| 163 | + if (isLineBreak(peek())) { |
| 164 | + break; |
| 165 | + } |
| 166 | + |
| 167 | + index++; |
| 168 | + } |
| 169 | + |
| 170 | + partStart = index; |
| 171 | + } |
| 172 | + |
| 173 | + // Multi-line comment |
| 174 | + if (peek() === CharCode.asterisk) { |
| 175 | + // Be careful not to include the first slash in the text part. |
| 176 | + parts.push(text.substring(partStart, index - 1)); |
| 177 | + |
| 178 | + // Start after the asterisk and scan until a */ is encountered. |
| 179 | + index++; |
| 180 | + while (index < length) { |
| 181 | + if (peek() === CharCode.asterisk && peek(1) === CharCode.slash) { |
| 182 | + index += 2; |
| 183 | + break; |
| 184 | + } |
| 185 | + |
| 186 | + index++; |
| 187 | + } |
| 188 | + |
| 189 | + partStart = index; |
| 190 | + } |
| 191 | + |
| 192 | + break; |
| 193 | + |
| 194 | + case CharCode.comma: |
| 195 | + // Ignore trailing commas in object member lists and array element lists |
| 196 | + let nextCode = peekPastWhitespace(); |
| 197 | + if (nextCode === CharCode.closeBrace || nextCode === CharCode.closeBracket) { |
| 198 | + parts.push(text.substring(partStart, index - 1)); |
| 199 | + partStart = index; |
| 200 | + } |
| 201 | + |
| 202 | + break; |
| 203 | + } |
| 204 | + |
| 205 | + if (index >= length && index > partStart) { |
| 206 | + parts.push(text.substring(partStart, length)); |
| 207 | + break; |
| 208 | + } |
| 209 | + } |
| 210 | + |
| 211 | + return parts.join(''); |
| 212 | +} |
| 213 | + |
| 214 | +export function tolerantParse(text: string) { |
| 215 | + text = cleanJsonText(text); |
| 216 | + return JSON.parse(text); |
| 217 | +} |
0 commit comments