Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
109 changes: 98 additions & 11 deletions std/net/isemail.d
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@ module std.net.isemail;

// FIXME
import std.range.primitives; // : ElementType;
import std.regex;
import std.traits;
import std.typecons : Flag, Yes, No;

Expand Down Expand Up @@ -74,10 +73,6 @@ if (isSomeChar!(Char))
alias tstring = const(Char)[];
alias Token = TokenImpl!(Char);

static ipRegex = ctRegex!(`\b(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}`~
`(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)$`.to!(const(Char)[]));
static fourChars = ctRegex!(`^[0-9A-Fa-f]{0,4}$`.to!(const(Char)[]));

enum defaultThreshold = 16;
int threshold;
bool diagnose;
Expand Down Expand Up @@ -399,12 +394,11 @@ if (isSomeChar!(Char))
auto maxGroups = 8;
size_t index = -1;
auto addressLiteral = parseData[EmailPart.componentLiteral];
auto matchesIp = addressLiteral.matchAll(ipRegex).map!(a => a.hit).array;
const(Char)[] ipSuffix = matchIPSuffix(addressLiteral);

if (!matchesIp.empty)
if (ipSuffix.length)
{
index = addressLiteral.lastIndexOf(matchesIp.front);

index = addressLiteral.length - ipSuffix.length;
if (index != 0)
addressLiteral = addressLiteral[0 .. index] ~ "0:0";
}
Expand All @@ -418,7 +412,7 @@ if (isSomeChar!(Char))
else
{
auto ipV6 = addressLiteral[5 .. $];
matchesIp = ipV6.split(Token.colon);
auto matchesIp = ipV6.split(Token.colon);
immutable groupCount = matchesIp.length;
index = ipV6.indexOf(Token.doubleColon);

Expand Down Expand Up @@ -453,7 +447,7 @@ if (isSomeChar!(Char))
returnStatus ~= EmailStatusCode.rfc5322IpV6ColonEnd;

else if (!matchesIp
.filter!(a => a.matchFirst(fourChars).empty)
.filter!(a => !isUpToFourHexChars(a))
.empty)
returnStatus ~= EmailStatusCode.rfc5322IpV6BadChar;

Expand Down Expand Up @@ -1863,3 +1857,96 @@ const(T)[] get (T) (const(T)[] str, size_t index, dchar c)
assert("abc".get(1, 'b') == "b");
assert("löv".get(1, 'ö') == "ö");
}

/+
Replacement for:
---
static fourChars = ctRegex!(`^[0-9A-Fa-f]{0,4}$`.to!(const(Char)[]));
...
a => a.matchFirst(fourChars).empty
---
+/
private bool isUpToFourHexChars(Char)(scope const(Char)[] s)
{
import std.ascii : isHexDigit;
if (s.length > 4) return false;
foreach (c; s)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

return s.length <= 4 && s.all!isHexDigit; But that would need std.algorithm.searching :)

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also, this would involve auto-decoding I think.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

.byCodeUnit - no autodecoding and in @nogc nothrow pure @safe

https://run.dlang.io/is/PvbTfz

if (!isHexDigit(c)) return false;
return true;
}

version(StdUnittest) @nogc nothrow pure @safe unittest
{
assert(!isUpToFourHexChars("12345"));
assert(!isUpToFourHexChars("defg"));
assert(isUpToFourHexChars("1A0a"));
}

/+
Replacement for:
---
static ipRegex = ctRegex!(`\b(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}`~
`(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)$`.to!(const(Char)[]));
...
auto matchesIp = addressLiteral.matchAll(ipRegex).map!(a => a.hit).array;
----
Note that only the first item of "matchAll" was ever used in practice
so we can return `const(Char)[]` instead of `const(Char)[][]` using a
zero-length string to indicate no match.
+/
private const(Char)[] matchIPSuffix(Char)(return const(Char)[] s) @nogc nothrow pure @safe
{
size_t end = s.length;
if (end < 7) return null;
// Check the first three `[.]\d{1,3}`
foreach (_; 0 .. 3)
{
size_t start = void;
if (end >= 2 && s[end-2] == '.')
start = end - 2;
else if (end >= 3 && s[end-3] == '.')
start = end - 3;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This line is untested, try an IP address with 2 digits in an octet.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed

else if (end >= 4 && s[end-4] == '.')
start = end - 4;
else
return null;
Copy link
Contributor

@wilzbach wilzbach Feb 6, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How about:

start = end - s.byCodeUnit.retro.take(4).until(".").count;

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You can do even one better:

int blocks = s.byCodeUnit
.splitter(".")
.filter!checkValid // check an individual block
.take(4) // avoids problems with infinite strings
.walkLength;
if (blocks < 4) return null;

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Don't think that does the right thing. The blocks need to be consecutive and they need to be the tail of the string, and we need the total length of the tail.

uint x = 0;
foreach (i; start + 1 .. end)
{
uint c = cast(uint) s[i] - '0';
if (c > 9) return null;
x = x * 10 + c;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

need to check for integer overflow

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can't overflow, we're summing a maximum of 3 digits.

}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

uint x = s[start + 1 .. end].byCodeUnit.take(2).to!uint.ifThrown(256);

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

not @nogc and not nothrow

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

FYI: @nogc isn't a requirement for Phobos (we prefer nice, readable code instead of mir). In this case it will be fixed once the already merged -dip1008 gets activated by default.
Also the previous matchAll isn't neither @nogc nor nothrow either (and neither is the code which uses it).

Regarding nothrow, how about scope(failure) return null;?
That would save even more lines.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

While making things @nogc nothrow is nice when ever possible, in this specific instance isEmail is not @nogc nothrow, so this change will have no effect on the user and there's no benefit.

Copy link
Member

@schveiguy schveiguy Feb 6, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What about converting to ubyte? then let to do the work of making sure the octet is in range.

FWIW, I like the idea of tagging internal functions @nogc and nothrow, even if the users aren't, because it helps keep GC usage down. Otherwise, little fixes that may add an unnecessary allocation creep in here and there.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

in this specific instance isEmail is not @nogc nothrow

It probably should be but I'll leave that for another pull request. It looks like isEmail is incorrectly using Exceptions rather than assert() to debug logic errors in the function itself (see forum discussion).

if (x > 255) return null;
end = start;
}
// Check the final `\d{1,3}`.
if (end < 1) return null;
size_t start = end - 1;
uint x = cast(uint) s[start] - '0';
if (x > 9) return null;
if (start > 0 && cast(uint) s[start-1] - '0' <= 9)
{
--start;
x += 10 * (cast(uint) s[start] - '0');
if (start > 0 && cast(uint) s[start-1] - '0' <= 9)
{
--start;
x += 100 * (cast(uint) s[start] - '0');
}
}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

s[max(0, end - 3) .. $].byCodeUnit.to!uint.ifThrown(256);

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can't use this here because unlike above I don't know where the encoded octet starts.

if (x > 255) return null;
// Must either be at start of string or preceded by a non-word character.
// (TO DETERMINE: is the definition of "word character" ASCII only?)
if (start == 0) return s;
const b = s[start - 1];
import std.ascii : isAlphaNum;
if (isAlphaNum(b) || b == '_') return null;
return s[start .. $];
}

version(StdUnittest) @nogc nothrow pure @safe unittest
{
assert(matchIPSuffix("255.255.255.255") == "255.255.255.255");
assert(matchIPSuffix("babaev 176.16.0.1") == "176.16.0.1");
}