From 06e40304b21a29420b40c3f2b8d4edeeaf96aa1f Mon Sep 17 00:00:00 2001 From: Nathan Sashihara <21227491+n8sh@users.noreply.github.com> Date: Tue, 6 Feb 2018 08:29:17 -0500 Subject: [PATCH] Fix Issue 18384 - std.net.isemail is slow to import due to regex Solution is to remove regex from std.net.isemail. May be worth revisiting if std.regex compile times improve. --- std/net/isemail.d | 109 +++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 98 insertions(+), 11 deletions(-) diff --git a/std/net/isemail.d b/std/net/isemail.d index 1af92f3d39b..fa0ff7bc184 100644 --- a/std/net/isemail.d +++ b/std/net/isemail.d @@ -27,7 +27,6 @@ module std.net.isemail; // FIXME import std.range.primitives; // : ElementType; -import std.regex; import std.traits; import std.typecons : Flag, Yes, No; @@ -74,10 +73,6 @@ if (isSomeChar!(Char)) alias tstring = const(Char)[]; alias Token = TokenImpl!(Char); - static ipRegex = ctRegex!(`\b(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}`~ - `(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)$`.to!(const(Char)[])); - static fourChars = ctRegex!(`^[0-9A-Fa-f]{0,4}$`.to!(const(Char)[])); - enum defaultThreshold = 16; int threshold; bool diagnose; @@ -399,12 +394,11 @@ if (isSomeChar!(Char)) auto maxGroups = 8; size_t index = -1; auto addressLiteral = parseData[EmailPart.componentLiteral]; - auto matchesIp = addressLiteral.matchAll(ipRegex).map!(a => a.hit).array; + const(Char)[] ipSuffix = matchIPSuffix(addressLiteral); - if (!matchesIp.empty) + if (ipSuffix.length) { - index = addressLiteral.lastIndexOf(matchesIp.front); - + index = addressLiteral.length - ipSuffix.length; if (index != 0) addressLiteral = addressLiteral[0 .. index] ~ "0:0"; } @@ -418,7 +412,7 @@ if (isSomeChar!(Char)) else { auto ipV6 = addressLiteral[5 .. $]; - matchesIp = ipV6.split(Token.colon); + auto matchesIp = ipV6.split(Token.colon); immutable groupCount = matchesIp.length; index = ipV6.indexOf(Token.doubleColon); @@ -453,7 +447,7 @@ if (isSomeChar!(Char)) returnStatus ~= EmailStatusCode.rfc5322IpV6ColonEnd; else if (!matchesIp - .filter!(a => a.matchFirst(fourChars).empty) + .filter!(a => !isUpToFourHexChars(a)) .empty) returnStatus ~= EmailStatusCode.rfc5322IpV6BadChar; @@ -1863,3 +1857,96 @@ const(T)[] get (T) (const(T)[] str, size_t index, dchar c) assert("abc".get(1, 'b') == "b"); assert("löv".get(1, 'ö') == "ö"); } + +/+ +Replacement for: +--- +static fourChars = ctRegex!(`^[0-9A-Fa-f]{0,4}$`.to!(const(Char)[])); +... +a => a.matchFirst(fourChars).empty +--- ++/ +private bool isUpToFourHexChars(Char)(scope const(Char)[] s) +{ + import std.ascii : isHexDigit; + if (s.length > 4) return false; + foreach (c; s) + if (!isHexDigit(c)) return false; + return true; +} + +version(StdUnittest) @nogc nothrow pure @safe unittest +{ + assert(!isUpToFourHexChars("12345")); + assert(!isUpToFourHexChars("defg")); + assert(isUpToFourHexChars("1A0a")); +} + +/+ +Replacement for: +--- +static ipRegex = ctRegex!(`\b(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}`~ + `(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)$`.to!(const(Char)[])); +... +auto matchesIp = addressLiteral.matchAll(ipRegex).map!(a => a.hit).array; +---- +Note that only the first item of "matchAll" was ever used in practice +so we can return `const(Char)[]` instead of `const(Char)[][]` using a +zero-length string to indicate no match. ++/ +private const(Char)[] matchIPSuffix(Char)(return const(Char)[] s) @nogc nothrow pure @safe +{ + size_t end = s.length; + if (end < 7) return null; + // Check the first three `[.]\d{1,3}` + foreach (_; 0 .. 3) + { + size_t start = void; + if (end >= 2 && s[end-2] == '.') + start = end - 2; + else if (end >= 3 && s[end-3] == '.') + start = end - 3; + else if (end >= 4 && s[end-4] == '.') + start = end - 4; + else + return null; + uint x = 0; + foreach (i; start + 1 .. end) + { + uint c = cast(uint) s[i] - '0'; + if (c > 9) return null; + x = x * 10 + c; + } + if (x > 255) return null; + end = start; + } + // Check the final `\d{1,3}`. + if (end < 1) return null; + size_t start = end - 1; + uint x = cast(uint) s[start] - '0'; + if (x > 9) return null; + if (start > 0 && cast(uint) s[start-1] - '0' <= 9) + { + --start; + x += 10 * (cast(uint) s[start] - '0'); + if (start > 0 && cast(uint) s[start-1] - '0' <= 9) + { + --start; + x += 100 * (cast(uint) s[start] - '0'); + } + } + if (x > 255) return null; + // Must either be at start of string or preceded by a non-word character. + // (TO DETERMINE: is the definition of "word character" ASCII only?) + if (start == 0) return s; + const b = s[start - 1]; + import std.ascii : isAlphaNum; + if (isAlphaNum(b) || b == '_') return null; + return s[start .. $]; +} + +version(StdUnittest) @nogc nothrow pure @safe unittest +{ + assert(matchIPSuffix("255.255.255.255") == "255.255.255.255"); + assert(matchIPSuffix("babaev 176.16.0.1") == "176.16.0.1"); +}