Skip to content

Commit 1f5b1b2

Browse files
committed
runtime: change osyield to use Windows SwitchToThread
It appears that windows osyield is just 15ms sleep on my computer (see benchmarks below). Replace NtWaitForSingleObject in osyield with SwitchToThread (as suggested by Dmitry). Also add issue #14790 related benchmarks, so we can track perfomance changes in CL 20834 and CL 20835 and beyond. Update #14790 benchmark old ns/op new ns/op delta BenchmarkChanToSyscallPing1ms 1953200 1953000 -0.01% BenchmarkChanToSyscallPing15ms 31562904 31248400 -1.00% BenchmarkSyscallToSyscallPing1ms 5247 4202 -19.92% BenchmarkSyscallToSyscallPing15ms 5260 4374 -16.84% BenchmarkChanToChanPing1ms 474 494 +4.22% BenchmarkChanToChanPing15ms 468 489 +4.49% BenchmarkOsYield1ms 980018 75.5 -99.99% BenchmarkOsYield15ms 15625200 75.8 -100.00% Change-Id: I1b4cc7caca784e2548ee3c846ca07ef152ebedce Reviewed-on: https://go-review.googlesource.com/21294 Run-TryBot: Alex Brainman <[email protected]> Reviewed-by: Dmitry Vyukov <[email protected]> Run-TryBot: Dmitry Vyukov <[email protected]> TryBot-Result: Gobot Gobot <[email protected]>
1 parent 02adfa0 commit 1f5b1b2

File tree

5 files changed

+186
-11
lines changed

5 files changed

+186
-11
lines changed

src/runtime/export_windows_test.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ package runtime
99
import "unsafe"
1010

1111
var TestingWER = &testingWER
12+
var OsYield = osyield
1213

1314
func NumberOfProcessors() int32 {
1415
var info systeminfo

src/runtime/os1_windows.go

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ import (
4141
//go:cgo_import_dynamic runtime._SetUnhandledExceptionFilter SetUnhandledExceptionFilter%1 "kernel32.dll"
4242
//go:cgo_import_dynamic runtime._SetWaitableTimer SetWaitableTimer%6 "kernel32.dll"
4343
//go:cgo_import_dynamic runtime._SuspendThread SuspendThread%1 "kernel32.dll"
44+
//go:cgo_import_dynamic runtime._SwitchToThread SwitchToThread%0 "kernel32.dll"
4445
//go:cgo_import_dynamic runtime._VirtualAlloc VirtualAlloc%4 "kernel32.dll"
4546
//go:cgo_import_dynamic runtime._VirtualFree VirtualFree%3 "kernel32.dll"
4647
//go:cgo_import_dynamic runtime._WSAGetOverlappedResult WSAGetOverlappedResult%5 "ws2_32.dll"
@@ -84,6 +85,7 @@ var (
8485
_SetUnhandledExceptionFilter,
8586
_SetWaitableTimer,
8687
_SuspendThread,
88+
_SwitchToThread,
8789
_VirtualAlloc,
8890
_VirtualFree,
8991
_WSAGetOverlappedResult,
@@ -189,6 +191,8 @@ var useLoadLibraryEx bool
189191

190192
func osinit() {
191193
asmstdcallAddr = unsafe.Pointer(funcPC(asmstdcall))
194+
usleep2Addr = unsafe.Pointer(funcPC(usleep2))
195+
switchtothreadAddr = unsafe.Pointer(funcPC(switchtothread))
192196

193197
setBadSignalMsg()
194198

@@ -586,17 +590,22 @@ func stdcall7(fn stdFunction, a0, a1, a2, a3, a4, a5, a6 uintptr) uintptr {
586590
}
587591

588592
// in sys_windows_386.s and sys_windows_amd64.s
589-
func usleep1(usec uint32)
593+
func onosstack(fn unsafe.Pointer, arg uint32)
594+
func usleep2(usec uint32)
595+
func switchtothread()
596+
597+
var usleep2Addr unsafe.Pointer
598+
var switchtothreadAddr unsafe.Pointer
590599

591600
//go:nosplit
592601
func osyield() {
593-
usleep1(1)
602+
onosstack(switchtothreadAddr, 0)
594603
}
595604

596605
//go:nosplit
597606
func usleep(us uint32) {
598607
// Have 1us units; want 100ns units.
599-
usleep1(10 * us)
608+
onosstack(usleep2Addr, 10*us)
600609
}
601610

602611
func ctrlhandler1(_type uint32) uint32 {

src/runtime/sys_windows_386.s

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -358,10 +358,11 @@ TEXT runtime·setldt(SB),NOSPLIT,$0
358358
MOVL CX, 0x14(FS)
359359
RET
360360

361-
// Sleep duration is in 100ns units.
362-
TEXT runtime·usleep1(SB),NOSPLIT,$0
363-
MOVL usec+0(FP), BX
364-
MOVL $runtime·usleep2(SB), AX // to hide from 8l
361+
// onosstack calls fn on OS stack.
362+
// func onosstack(fn unsafe.Pointer, arg uint32)
363+
TEXT runtime·onosstack(SB),NOSPLIT,$0
364+
MOVL fn+0(FP), AX // to hide from 8l
365+
MOVL arg+4(FP), BX
365366

366367
// Execute call on m->g0 stack, in case we are not actually
367368
// calling a system call wrapper, like when running under WINE.
@@ -423,6 +424,14 @@ TEXT runtime·usleep2(SB),NOSPLIT,$20
423424
MOVL BP, SP
424425
RET
425426

427+
// Runs on OS stack.
428+
TEXT runtime·switchtothread(SB),NOSPLIT,$0
429+
MOVL SP, BP
430+
MOVL runtime·_SwitchToThread(SB), AX
431+
CALL AX
432+
MOVL BP, SP
433+
RET
434+
426435
// func now() (sec int64, nsec int32)
427436
TEXT time·now(SB),NOSPLIT,$8-12
428437
CALL runtime·unixnano(SB)

src/runtime/sys_windows_amd64.s

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -381,10 +381,10 @@ TEXT runtime·settls(SB),NOSPLIT,$0
381381
MOVQ DI, 0x28(GS)
382382
RET
383383

384-
// Sleep duration is in 100ns units.
385-
TEXT runtime·usleep1(SB),NOSPLIT,$0
386-
MOVL usec+0(FP), BX
387-
MOVQ $runtime·usleep2(SB), AX // to hide from 6l
384+
// func onosstack(fn unsafe.Pointer, arg uint32)
385+
TEXT runtime·onosstack(SB),NOSPLIT,$0
386+
MOVQ fn+0(FP), AX // to hide from 6l
387+
MOVL arg+8(FP), BX
388388

389389
// Execute call on m->g0 stack, in case we are not actually
390390
// calling a system call wrapper, like when running under WINE.
@@ -445,6 +445,18 @@ TEXT runtime·usleep2(SB),NOSPLIT,$48
445445
MOVQ 40(SP), SP
446446
RET
447447

448+
// Runs on OS stack.
449+
TEXT runtime·switchtothread(SB),NOSPLIT,$0
450+
MOVQ SP, AX
451+
ANDQ $~15, SP // alignment as per Windows requirement
452+
SUBQ $(48), SP // room for SP and 4 args as per Windows requirement
453+
// plus one extra word to keep stack 16 bytes aligned
454+
MOVQ AX, 32(SP)
455+
MOVQ runtime·_SwitchToThread(SB), AX
456+
CALL AX
457+
MOVQ 32(SP), SP
458+
RET
459+
448460
// func now() (sec int64, nsec int32)
449461
TEXT time·now(SB),NOSPLIT,$8-12
450462
CALL runtime·unixnano(SB)

src/runtime/syscall_windows_test.go

Lines changed: 144 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -864,3 +864,147 @@ func TestLoadLibraryEx(t *testing.T) {
864864
t.Skipf("LoadLibraryEx not usable, but not expected. (LoadLibraryEx=%v; flags=%v)",
865865
have, flags)
866866
}
867+
868+
var (
869+
modwinmm = syscall.NewLazyDLL("winmm.dll")
870+
modkernel32 = syscall.NewLazyDLL("kernel32.dll")
871+
872+
proctimeBeginPeriod = modwinmm.NewProc("timeBeginPeriod")
873+
proctimeEndPeriod = modwinmm.NewProc("timeEndPeriod")
874+
875+
procCreateEvent = modkernel32.NewProc("CreateEventW")
876+
procSetEvent = modkernel32.NewProc("SetEvent")
877+
)
878+
879+
func timeBeginPeriod(period uint32) {
880+
syscall.Syscall(proctimeBeginPeriod.Addr(), 1, uintptr(period), 0, 0)
881+
}
882+
883+
func timeEndPeriod(period uint32) {
884+
syscall.Syscall(proctimeEndPeriod.Addr(), 1, uintptr(period), 0, 0)
885+
}
886+
887+
func createEvent() (syscall.Handle, error) {
888+
r0, _, e0 := syscall.Syscall6(procCreateEvent.Addr(), 4, 0, 0, 0, 0, 0, 0)
889+
if r0 == 0 {
890+
return 0, syscall.Errno(e0)
891+
}
892+
return syscall.Handle(r0), nil
893+
}
894+
895+
func setEvent(h syscall.Handle) error {
896+
r0, _, e0 := syscall.Syscall(procSetEvent.Addr(), 1, uintptr(h), 0, 0)
897+
if r0 == 0 {
898+
return syscall.Errno(e0)
899+
}
900+
return nil
901+
}
902+
903+
func benchChanToSyscallPing(b *testing.B) {
904+
ch := make(chan int)
905+
event, err := createEvent()
906+
if err != nil {
907+
b.Fatal(err)
908+
}
909+
go func() {
910+
for i := 0; i < b.N; i++ {
911+
syscall.WaitForSingleObject(event, syscall.INFINITE)
912+
ch <- 1
913+
}
914+
}()
915+
for i := 0; i < b.N; i++ {
916+
err := setEvent(event)
917+
if err != nil {
918+
b.Fatal(err)
919+
}
920+
<-ch
921+
}
922+
}
923+
924+
func BenchmarkChanToSyscallPing1ms(b *testing.B) {
925+
timeBeginPeriod(1)
926+
benchChanToSyscallPing(b)
927+
timeEndPeriod(1)
928+
}
929+
930+
func BenchmarkChanToSyscallPing15ms(b *testing.B) {
931+
benchChanToSyscallPing(b)
932+
}
933+
934+
func benchSyscallToSyscallPing(b *testing.B) {
935+
event1, err := createEvent()
936+
if err != nil {
937+
b.Fatal(err)
938+
}
939+
event2, err := createEvent()
940+
if err != nil {
941+
b.Fatal(err)
942+
}
943+
go func() {
944+
for i := 0; i < b.N; i++ {
945+
syscall.WaitForSingleObject(event1, syscall.INFINITE)
946+
err := setEvent(event2)
947+
if err != nil {
948+
b.Fatal(err)
949+
}
950+
}
951+
}()
952+
for i := 0; i < b.N; i++ {
953+
err := setEvent(event1)
954+
if err != nil {
955+
b.Fatal(err)
956+
}
957+
syscall.WaitForSingleObject(event2, syscall.INFINITE)
958+
}
959+
}
960+
961+
func BenchmarkSyscallToSyscallPing1ms(b *testing.B) {
962+
timeBeginPeriod(1)
963+
benchSyscallToSyscallPing(b)
964+
timeEndPeriod(1)
965+
}
966+
967+
func BenchmarkSyscallToSyscallPing15ms(b *testing.B) {
968+
benchSyscallToSyscallPing(b)
969+
}
970+
971+
func benchChanToChanPing(b *testing.B) {
972+
ch1 := make(chan int)
973+
ch2 := make(chan int)
974+
go func() {
975+
for i := 0; i < b.N; i++ {
976+
<-ch1
977+
ch2 <- 1
978+
}
979+
}()
980+
for i := 0; i < b.N; i++ {
981+
ch1 <- 1
982+
<-ch2
983+
}
984+
}
985+
986+
func BenchmarkChanToChanPing1ms(b *testing.B) {
987+
timeBeginPeriod(1)
988+
benchChanToChanPing(b)
989+
timeEndPeriod(1)
990+
}
991+
992+
func BenchmarkChanToChanPing15ms(b *testing.B) {
993+
benchChanToChanPing(b)
994+
}
995+
996+
func benchOsYield(b *testing.B) {
997+
for i := 0; i < b.N; i++ {
998+
runtime.OsYield()
999+
}
1000+
}
1001+
1002+
func BenchmarkOsYield1ms(b *testing.B) {
1003+
timeBeginPeriod(1)
1004+
benchOsYield(b)
1005+
timeEndPeriod(1)
1006+
}
1007+
1008+
func BenchmarkOsYield15ms(b *testing.B) {
1009+
benchOsYield(b)
1010+
}

0 commit comments

Comments
 (0)