Skip to content

Commit f7396aa

Browse files
dsnetgopherbot
authored andcommitted
time: optimize Parse for RFC3339 and RFC3339Nano
RFC 3339 is the most common time representation, being used in an overwhelming 57.3% of all specified formats, while the next competitor only holds 7.5% usage. Specially optimize parsing to handle the RFC 3339 format. To reduce the complexity of error checking, parseRFC3339 simply returns a bool indicating parsing success. It leaves error handling to the general parse path. To assist in fuzzing, the internal parse function was left unmodified so that we could test that parseRFC3339 and parse agree with each other. Performance: name old time/op new time/op delta ParseRFC3339UTC 112ns ± 1% 37ns ± 1% -67.37% (p=0.000 n=9+9) ParseRFC3339TZ 259ns ± 2% 67ns ± 1% -73.92% (p=0.000 n=10+9) Credit goes to Amarjeet Anand for a prior CL attemping to optimize this. See CL 425014. Fixes #54093 Change-Id: I14f4e8c52b092d44ceef6863f261842ed7e83f4c Reviewed-on: https://go-review.googlesource.com/c/go/+/425197 Reviewed-by: Rob Pike <[email protected]> Run-TryBot: Joseph Tsai <[email protected]> Reviewed-by: Michael Knyszek <[email protected]> TryBot-Result: Gopher Robot <[email protected]> Auto-Submit: Joseph Tsai <[email protected]> Reviewed-by: Jenny Rakoczy <[email protected]>
1 parent 7ffbcd1 commit f7396aa

File tree

4 files changed

+189
-31
lines changed

4 files changed

+189
-31
lines changed

src/time/export_test.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -135,3 +135,5 @@ var Quote = quote
135135

136136
var AppendFormatAny = Time.appendFormat
137137
var AppendFormatRFC3339 = Time.appendFormatRFC3339
138+
var ParseAny = parse
139+
var ParseRFC3339 = parseRFC3339

src/time/format.go

Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -618,6 +618,7 @@ func (t Time) Format(layout string) string {
618618
// AppendFormat is like Format but appends the textual
619619
// representation to b and returns the extended buffer.
620620
func (t Time) AppendFormat(b []byte, layout string) []byte {
621+
// Optimize for RFC3339 as it accounts for over half of all representations.
621622
switch layout {
622623
case RFC3339:
623624
return t.appendFormatRFC3339(b, false)
@@ -1018,6 +1019,12 @@ func skip(value, prefix string) (string, error) {
10181019
// differ by the actual zone offset. To avoid such problems, prefer time layouts
10191020
// that use a numeric zone offset, or use ParseInLocation.
10201021
func Parse(layout, value string) (Time, error) {
1022+
// Optimize for RFC3339 as it accounts for over half of all representations.
1023+
if layout == RFC3339 || layout == RFC3339Nano {
1024+
if t, ok := parseRFC3339(value, Local); ok {
1025+
return t, nil
1026+
}
1027+
}
10211028
return parse(layout, value, UTC, Local)
10221029
}
10231030

@@ -1027,9 +1034,88 @@ func Parse(layout, value string) (Time, error) {
10271034
// Second, when given a zone offset or abbreviation, Parse tries to match it
10281035
// against the Local location; ParseInLocation uses the given location.
10291036
func ParseInLocation(layout, value string, loc *Location) (Time, error) {
1037+
// Optimize for RFC3339 as it accounts for over half of all representations.
1038+
if layout == RFC3339 || layout == RFC3339Nano {
1039+
if t, ok := parseRFC3339(value, loc); ok {
1040+
return t, nil
1041+
}
1042+
}
10301043
return parse(layout, value, loc, loc)
10311044
}
10321045

1046+
func parseRFC3339(s string, local *Location) (Time, bool) {
1047+
// parseUint parses s as an unsigned decimal integer and
1048+
// verifies that it is within some range.
1049+
// If it is invalid or out-of-range,
1050+
// it sets ok to false and returns the min value.
1051+
ok := true
1052+
parseUint := func(s string, min, max int) (x int) {
1053+
for _, c := range []byte(s) {
1054+
if c < '0' || '9' < c {
1055+
ok = false
1056+
return min
1057+
}
1058+
x = x*10 + int(c) - '0'
1059+
}
1060+
if x < min || max < x {
1061+
ok = false
1062+
return min
1063+
}
1064+
return x
1065+
}
1066+
1067+
// Parse the date and time.
1068+
if len(s) < len("2006-01-02T15:04:05") {
1069+
return Time{}, false
1070+
}
1071+
year := parseUint(s[0:4], 0, 9999) // e.g., 2006
1072+
month := parseUint(s[5:7], 1, 12) // e.g., 01
1073+
day := parseUint(s[8:10], 1, daysIn(Month(month), year)) // e.g., 02
1074+
hour := parseUint(s[11:13], 0, 23) // e.g., 15
1075+
min := parseUint(s[14:16], 0, 59) // e.g., 04
1076+
sec := parseUint(s[17:19], 0, 59) // e.g., 05
1077+
if !ok || !(s[4] == '-' && s[7] == '-' && s[10] == 'T' && s[13] == ':' && s[16] == ':') {
1078+
return Time{}, false
1079+
}
1080+
s = s[19:]
1081+
1082+
// Parse the fractional second.
1083+
var nsec int
1084+
if len(s) >= 2 && s[0] == '.' && isDigit(s, 1) {
1085+
n := 2
1086+
for ; n < len(s) && isDigit(s, n); n++ {
1087+
}
1088+
nsec, _, _ = parseNanoseconds(s, n)
1089+
s = s[n:]
1090+
}
1091+
1092+
// Parse the time zone.
1093+
t := Date(year, Month(month), day, hour, min, sec, nsec, UTC)
1094+
if s != "Z" {
1095+
if len(s) != len("-07:00") {
1096+
return Time{}, false
1097+
}
1098+
hr := parseUint(s[1:3], 0, 23) // e.g., 07
1099+
mm := parseUint(s[4:6], 0, 59) // e.g., 00
1100+
if !ok || !((s[0] == '-' || s[0] == '+') && s[3] == ':') {
1101+
return Time{}, false
1102+
}
1103+
zoneOffset := (hr*60 + mm) * 60
1104+
if s[0] == '-' {
1105+
zoneOffset *= -1
1106+
}
1107+
t.addSec(-int64(zoneOffset))
1108+
1109+
// Use local zone with the given offset if possible.
1110+
if _, offset, _, _, _ := local.lookup(t.unixSec()); offset == zoneOffset {
1111+
t.setLoc(local)
1112+
} else {
1113+
t.setLoc(FixedZone("", zoneOffset))
1114+
}
1115+
}
1116+
return t, true
1117+
}
1118+
10331119
func parse(layout, value string, defaultLocation, local *Location) (Time, error) {
10341120
alayout, avalue := layout, value
10351121
rangeErrString := "" // set if a value is out of range

src/time/format_test.go

Lines changed: 89 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -871,44 +871,44 @@ func TestFormatFractionalSecondSeparators(t *testing.T) {
871871
}
872872
}
873873

874+
var longFractionalDigitsTests = []struct {
875+
value string
876+
want int
877+
}{
878+
// 9 digits
879+
{"2021-09-29T16:04:33.000000000Z", 0},
880+
{"2021-09-29T16:04:33.000000001Z", 1},
881+
{"2021-09-29T16:04:33.100000000Z", 100_000_000},
882+
{"2021-09-29T16:04:33.100000001Z", 100_000_001},
883+
{"2021-09-29T16:04:33.999999999Z", 999_999_999},
884+
{"2021-09-29T16:04:33.012345678Z", 12_345_678},
885+
// 10 digits, truncates
886+
{"2021-09-29T16:04:33.0000000000Z", 0},
887+
{"2021-09-29T16:04:33.0000000001Z", 0},
888+
{"2021-09-29T16:04:33.1000000000Z", 100_000_000},
889+
{"2021-09-29T16:04:33.1000000009Z", 100_000_000},
890+
{"2021-09-29T16:04:33.9999999999Z", 999_999_999},
891+
{"2021-09-29T16:04:33.0123456789Z", 12_345_678},
892+
// 11 digits, truncates
893+
{"2021-09-29T16:04:33.10000000000Z", 100_000_000},
894+
{"2021-09-29T16:04:33.00123456789Z", 1_234_567},
895+
// 12 digits, truncates
896+
{"2021-09-29T16:04:33.000123456789Z", 123_456},
897+
// 15 digits, truncates
898+
{"2021-09-29T16:04:33.9999999999999999Z", 999_999_999},
899+
}
900+
874901
// Issue 48685 and 54567.
875902
func TestParseFractionalSecondsLongerThanNineDigits(t *testing.T) {
876-
tests := []struct {
877-
s string
878-
want int
879-
}{
880-
// 9 digits
881-
{"2021-09-29T16:04:33.000000000Z", 0},
882-
{"2021-09-29T16:04:33.000000001Z", 1},
883-
{"2021-09-29T16:04:33.100000000Z", 100_000_000},
884-
{"2021-09-29T16:04:33.100000001Z", 100_000_001},
885-
{"2021-09-29T16:04:33.999999999Z", 999_999_999},
886-
{"2021-09-29T16:04:33.012345678Z", 12_345_678},
887-
// 10 digits, truncates
888-
{"2021-09-29T16:04:33.0000000000Z", 0},
889-
{"2021-09-29T16:04:33.0000000001Z", 0},
890-
{"2021-09-29T16:04:33.1000000000Z", 100_000_000},
891-
{"2021-09-29T16:04:33.1000000009Z", 100_000_000},
892-
{"2021-09-29T16:04:33.9999999999Z", 999_999_999},
893-
{"2021-09-29T16:04:33.0123456789Z", 12_345_678},
894-
// 11 digits, truncates
895-
{"2021-09-29T16:04:33.10000000000Z", 100_000_000},
896-
{"2021-09-29T16:04:33.00123456789Z", 1_234_567},
897-
// 12 digits, truncates
898-
{"2021-09-29T16:04:33.000123456789Z", 123_456},
899-
// 15 digits, truncates
900-
{"2021-09-29T16:04:33.9999999999999999Z", 999_999_999},
901-
}
902-
903-
for _, tt := range tests {
903+
for _, tt := range longFractionalDigitsTests {
904904
for _, format := range []string{RFC3339, RFC3339Nano} {
905-
tm, err := Parse(format, tt.s)
905+
tm, err := Parse(format, tt.value)
906906
if err != nil {
907-
t.Errorf("Parse(%q, %q) error: %v", format, tt.s, err)
907+
t.Errorf("Parse(%q, %q) error: %v", format, tt.value, err)
908908
continue
909909
}
910910
if got := tm.Nanosecond(); got != tt.want {
911-
t.Errorf("Parse(%q, %q) = got %d, want %d", format, tt.s, got, tt.want)
911+
t.Errorf("Parse(%q, %q) = got %d, want %d", format, tt.value, got, tt.want)
912912
}
913913
}
914914
}
@@ -955,3 +955,61 @@ func FuzzFormatRFC3339(f *testing.F) {
955955
}
956956
})
957957
}
958+
959+
func FuzzParseRFC3339(f *testing.F) {
960+
for _, tt := range formatTests {
961+
f.Add(tt.result)
962+
}
963+
for _, tt := range parseTests {
964+
f.Add(tt.value)
965+
}
966+
for _, tt := range parseErrorTests {
967+
f.Add(tt.value)
968+
}
969+
for _, tt := range longFractionalDigitsTests {
970+
f.Add(tt.value)
971+
}
972+
973+
f.Fuzz(func(t *testing.T, s string) {
974+
// equalTime is like time.Time.Equal, but also compares the time zone.
975+
equalTime := func(t1, t2 Time) bool {
976+
name1, offset1 := t1.Zone()
977+
name2, offset2 := t2.Zone()
978+
return t1.Equal(t2) && name1 == name2 && offset1 == offset2
979+
}
980+
981+
for _, tz := range []*Location{UTC, Local} {
982+
// Parsing as RFC3339 or RFC3339Nano should be identical.
983+
t1, err1 := ParseAny(RFC3339, s, UTC, tz)
984+
t2, err2 := ParseAny(RFC3339Nano, s, UTC, tz)
985+
switch {
986+
case (err1 == nil) != (err2 == nil):
987+
t.Fatalf("ParseAny(%q) error mismatch:\n\tgot: %v\n\twant: %v", s, err1, err2)
988+
case !equalTime(t1, t2):
989+
t.Fatalf("ParseAny(%q) value mismatch:\n\tgot: %v\n\twant: %v", s, t1, t2)
990+
}
991+
992+
// TODO(https://go.dev/issue/54580):
993+
// Remove these checks after ParseAny rejects all invalid RFC 3339.
994+
if err1 == nil {
995+
num2 := func(s string) byte { return 10*(s[0]-'0') + (s[1] - '0') }
996+
switch {
997+
case len(s) > 12 && s[12] == ':':
998+
t.Skipf("ParseAny(%q) incorrectly allows single-digit hour fields", s)
999+
case len(s) > 19 && s[19] == ',':
1000+
t.Skipf("ParseAny(%q) incorrectly allows comma as sub-second separator", s)
1001+
case !strings.HasSuffix(s, "Z") && len(s) > 4 && (num2(s[len(s)-5:]) >= 24 || num2(s[len(s)-2:]) >= 60):
1002+
t.Skipf("ParseAny(%q) incorrectly allows out-of-range zone offset", s)
1003+
}
1004+
}
1005+
1006+
// Customized parser should be identical to general parser.
1007+
switch got, ok := ParseRFC3339(s, tz); {
1008+
case ok != (err1 == nil):
1009+
t.Fatalf("ParseRFC3339(%q) error mismatch:\n\tgot: %v\n\twant: %v", s, ok, err1 == nil)
1010+
case !equalTime(got, t1):
1011+
t.Fatalf("ParseRFC3339(%q) value mismatch:\n\tgot: %v\n\twant: %v", s, got, t2)
1012+
}
1013+
}
1014+
})
1015+
}

src/time/time_test.go

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1445,6 +1445,18 @@ func BenchmarkParse(b *testing.B) {
14451445
}
14461446
}
14471447

1448+
func BenchmarkParseRFC3339UTC(b *testing.B) {
1449+
for i := 0; i < b.N; i++ {
1450+
Parse(RFC3339, "2020-08-22T11:27:43.123456789Z")
1451+
}
1452+
}
1453+
1454+
func BenchmarkParseRFC3339TZ(b *testing.B) {
1455+
for i := 0; i < b.N; i++ {
1456+
Parse(RFC3339, "2020-08-22T11:27:43.123456789-02:00")
1457+
}
1458+
}
1459+
14481460
func BenchmarkParseDuration(b *testing.B) {
14491461
for i := 0; i < b.N; i++ {
14501462
ParseDuration("9007199254.740993ms")

0 commit comments

Comments
 (0)