5
5
#include " node_i18n.h"
6
6
#include " util-inl.h"
7
7
8
+ #include < algorithm>
8
9
#include < cmath>
9
10
#include < cstdio>
10
11
#include < numeric>
@@ -58,7 +59,7 @@ class URLHost {
58
59
public:
59
60
~URLHost ();
60
61
61
- void ParseIPv4Host (const char * input, size_t length, bool * is_ipv4 );
62
+ void ParseIPv4Host (const char * input, size_t length);
62
63
void ParseIPv6Host (const char * input, size_t length);
63
64
void ParseOpaqueHost (const char * input, size_t length);
64
65
void ParseHost (const char * input,
@@ -359,18 +360,21 @@ void URLHost::ParseIPv6Host(const char* input, size_t length) {
359
360
type_ = HostType::H_IPV6;
360
361
}
361
362
362
- int64_t ParseNumber (const char * start, const char * end) {
363
+ // https://url.spec.whatwg.org/#ipv4-number-parser
364
+ int64_t ParseIPv4Number (const char * start, const char * end) {
365
+ if (end - start == 0 ) return -1 ;
366
+
363
367
unsigned R = 10 ;
364
368
if (end - start >= 2 && start[0 ] == ' 0' && (start[1 ] | 0x20 ) == ' x' ) {
365
369
start += 2 ;
366
370
R = 16 ;
367
- }
368
- if (end - start == 0 ) {
369
- return 0 ;
370
- } else if (R == 10 && end - start > 1 && start[0 ] == ' 0' ) {
371
+ } else if (end - start >= 2 && start[0 ] == ' 0' ) {
371
372
start++;
372
373
R = 8 ;
373
374
}
375
+
376
+ if (end - start == 0 ) return 0 ;
377
+
374
378
const char * p = start;
375
379
376
380
while (p < end) {
@@ -394,9 +398,34 @@ int64_t ParseNumber(const char* start, const char* end) {
394
398
return strtoll (start, nullptr , R);
395
399
}
396
400
397
- void URLHost::ParseIPv4Host (const char * input, size_t length, bool * is_ipv4) {
401
+ // https://url.spec.whatwg.org/#ends-in-a-number-checker
402
+ bool EndsInANumber (const std::string& input) {
403
+ std::vector<std::string> parts = SplitString (input, ' .' , false );
404
+
405
+ if (parts.empty ()) return false ;
406
+
407
+ if (parts.back ().empty ()) {
408
+ if (parts.size () == 1 ) return false ;
409
+ parts.pop_back ();
410
+ }
411
+
412
+ const std::string& last = parts.back ();
413
+
414
+ // If last is non-empty and contains only ASCII digits, then return true
415
+ if (!last.empty () &&
416
+ std::all_of (last.begin (), last.end (), ::isdigit)) {
417
+ return true ;
418
+ }
419
+
420
+ const char * last_str = last.c_str ();
421
+ int64_t num = ParseIPv4Number (last_str, last_str + last.size ());
422
+ if (num >= 0 ) return true ;
423
+
424
+ return false ;
425
+ }
426
+
427
+ void URLHost::ParseIPv4Host (const char * input, size_t length) {
398
428
CHECK_EQ (type_, HostType::H_FAILED);
399
- *is_ipv4 = false ;
400
429
const char * pointer = input;
401
430
const char * mark = input;
402
431
const char * end = pointer + length;
@@ -414,7 +443,7 @@ void URLHost::ParseIPv4Host(const char* input, size_t length, bool* is_ipv4) {
414
443
if (++parts > static_cast <int >(arraysize (numbers))) return ;
415
444
if (pointer == mark)
416
445
return ;
417
- int64_t n = ParseNumber (mark, pointer);
446
+ int64_t n = ParseIPv4Number (mark, pointer);
418
447
if (n < 0 )
419
448
return ;
420
449
@@ -429,7 +458,6 @@ void URLHost::ParseIPv4Host(const char* input, size_t length, bool* is_ipv4) {
429
458
pointer++;
430
459
}
431
460
CHECK_GT (parts, 0 );
432
- *is_ipv4 = true ;
433
461
434
462
// If any but the last item in numbers is greater than 255, return failure.
435
463
// If the last item in numbers is greater than or equal to
@@ -501,11 +529,10 @@ void URLHost::ParseHost(const char* input,
501
529
}
502
530
}
503
531
504
- // Check to see if it's an IPv4 IP address
505
- bool is_ipv4;
506
- ParseIPv4Host (decoded.c_str (), decoded.length (), &is_ipv4);
507
- if (is_ipv4)
508
- return ;
532
+ // If domain ends in a number, then return the result of IPv4 parsing domain
533
+ if (EndsInANumber (decoded)) {
534
+ return ParseIPv4Host (decoded.c_str (), decoded.length ());
535
+ }
509
536
510
537
// If the unicode flag is set, run the result through punycode ToUnicode
511
538
if (unicode && !ToUnicode (decoded, &decoded))
0 commit comments