Skip to content

database/sql: possible deadlock on Rows.awaitDone()/Rows.Err()/Rows.Close() after RawBytes change #64498

Open
@fernando-az-alpaca

Description

@fernando-az-alpaca

Go version

go version go1.21.4 linux/arm64

What operating system and processor architecture are you using (go env)?

GO111MODULE=''
GOARCH='arm64'
GOBIN=''
GOCACHE='/root/.cache/go-build'
GOENV='/root/.config/go/env'
GOEXE=''
GOEXPERIMENT=''
GOFLAGS=''
GOHOSTARCH='arm64'
GOHOSTOS='linux'
GOINSECURE=''
GOMODCACHE='/go/pkg/mod'
GONOPROXY=''
GONOSUMDB=''
GOOS='linux'
GOPATH='/go'
GOPRIVATE=''
GOPROXY='https://proxy.golang.org,direct'
GOROOT='/usr/local/go'
GOSUMDB='sum.golang.org'
GOTMPDIR=''
GOTOOLCHAIN='local'
GOTOOLDIR='/usr/local/go/pkg/tool/linux_arm64'
GOVCS=''
GOVERSION='go1.21.4'
GCCGO='gccgo'
AR='ar'
CC='gcc'
CXX='g++'
CGO_ENABLED='1'
GOMOD='/go/src/app/go.mod'
GOWORK=''
CGO_CFLAGS='-O2 -g'
CGO_CPPFLAGS=''
CGO_CXXFLAGS='-O2 -g'
CGO_FFLAGS='-O2 -g'
CGO_LDFLAGS='-O2 -g'
PKG_CONFIG='pkg-config'
GOGCCFLAGS='-fPIC -pthread -Wl,--no-gc-sections -fmessage-length=0 -
ffile-prefix-map=/tmp/go-build2162660666=/tmp/go-build -gno-record-g
cc-switches'

What did you do?

Note: whether or not this is legitimate bug or a usage error depends on the answer to the question: "after this change #60304 (comment), is it an error to call Rows.Err() after a context cancellation but before calling Rows.Close()? " Thanks in advance for taking a look and I apologize if I've made any mistkaes.

Note: a full docker-compose setup for the below code and additional required components is available at https://github.com/martonw/go-sql-deadlock.

package main

import (
	"context"
	"database/sql"
	"fmt"
	"time"

	_ "github.com/jackc/pgx/v5/stdlib"
)

func main() {

	var uuid sql.RawBytes
	ctx, ctxCancel := context.WithCancel(context.Background())
	defer ctxCancel()

	db, err := sql.Open("pgx", "postgres://test:test@localhost:5432/test")
	if err != nil {
		panic(err.Error())
	}
	defer db.Close()

	rows, err := db.QueryContext(ctx, "select id /* id is an uuid */ from test limit 1")
	if err != nil {
		panic(err.Error())

	}
	defer rows.Close()

	if rows.Next() {
		rows.Scan(&uuid)
	}

	fmt.Printf("uuid %v\n", uuid)

	ctxCancel()

	fmt.Println("sleep begin")
	time.Sleep(1 * time.Second)
	fmt.Println("sleep end")

	fmt.Printf("err %v\n", rows.Err())

	fmt.Println("successful exit")

}

What did you expect to see?

No deadlocks (assuming this pattern

defer rows.Close()
rows.Next()
rows.Scan()
ctxCancel()
rows.Err()

is legitimate - and I'll add here that this pattern is used by the gorm package - and so potentially affects a wide audience - and is not as hard to hit upon as it might seem at first glance).

What did you see instead?

The code above will sometimes (but not always) hang/deadlock.

$ ./test
uuid [100 98 97 100 48 49 55 57 45 101 49 98 52 45 52 54 98 55 45 97 99 57 53 45 99 55 49 53 57 97 102 51 98 55 57 55]
sleep begin
sleep end

(... just hangs here ...)


SIGABRT: abort
PC=0x78a3c m=0 sigcode=0

(...)

goroutine 1 [sync.RWMutex.RLock]:
runtime.gopark(0x7518a0?, 0x0?, 0xc0?, 0x60?, 0x7fdc0?)
        /usr/local/go/src/runtime/proc.go:398 +0xc8 fp=0x40001d3d20 sp=0x40001d3d00 pc=0x47cb8
runtime.goparkunlock(...)
        /usr/local/go/src/runtime/proc.go:404
runtime.semacquire1(0x4000128164, 0x10?, 0x3, 0x0, 0xa?)
        /usr/local/go/src/runtime/sema.go:160 +0x208 fp=0x40001d3d70 sp=0x40001d3d20 pc=0x59298
sync.runtime_SemacquireRWMutexR(0x40001d3e08?, 0xac?, 0x400003e030?)
        /usr/local/go/src/runtime/sema.go:82 +0x28 fp=0x40001d3db0 sp=0x40001d3d70 pc=0x73a08
sync.(*RWMutex).RLock(...)
        /usr/local/go/src/sync/rwmutex.go:71
database/sql.(*Rows).Err(0x4000128120)
        /usr/local/go/src/database/sql/sql.go:3100 +0x88 fp=0x40001d3e10 sp=0x40001d3db0 pc=0xdbbe8
main.main()
        /app/main.go:49 +0x224 fp=0x40001d3f30 sp=0x40001d3e10 pc=0x33deb4
runtime.main()
        /usr/local/go/src/runtime/proc.go:267 +0x2bc fp=0x40001d3fd0 sp=0x40001d3f30 pc=0x4788c
runtime.goexit()
        /usr/local/go/src/runtime/asm_arm64.s:1197 +0x4 fp=0x40001d3fd0 sp=0x40001d3fd0 pc=0x775c4

(...)

goroutine 13 [sync.RWMutex.Lock]:
runtime.gopark(0x7518a0?, 0x58bd0?, 0xe0?, 0x61?, 0x1c77c?)
        /usr/local/go/src/runtime/proc.go:398 +0xc8 fp=0x400003dd90 sp=0x400003dd70 pc=0x47cb8
runtime.goparkunlock(...)
        /usr/local/go/src/runtime/proc.go:404
runtime.semacquire1(0x4000128160, 0x8?, 0x3, 0x0, 0x80?)
        /usr/local/go/src/runtime/sema.go:160 +0x208 fp=0x400003dde0 sp=0x400003dd90 pc=0x59298
sync.runtime_SemacquireRWMutex(0x10?, 0xa0?, 0x1?)
        /usr/local/go/src/runtime/sema.go:87 +0x28 fp=0x400003de20 sp=0x400003dde0 pc=0x73a68
sync.(*RWMutex).Lock(0x400003dfa8?)
        /usr/local/go/src/sync/rwmutex.go:152 +0xf8 fp=0x400003de50 sp=0x400003de20 pc=0x80fc8
database/sql.(*Rows).close(0x4000128120, {0x49b960, 0x739520})
        /usr/local/go/src/database/sql/sql.go:3371 +0x88 fp=0x400003dee0 sp=0x400003de50 pc=0xdc3e8
database/sql.(*Rows).awaitDone(0x4000128120, {0x4a0df0, 0x4000068140}, {0x0, 0x0}, {0x4a0df0, 0x4000068410})
        /usr/local/go/src/database/sql/sql.go:2973 +0x1a4 fp=0x400003df80 sp=0x400003dee0 pc=0xdb694
database/sql.(*Rows).initContextClose.func1()
        /usr/local/go/src/database/sql/sql.go:2949 +0x40 fp=0x400003dfd0 sp=0x400003df80 pc=0xdb4b0
runtime.goexit()
        /usr/local/go/src/runtime/asm_arm64.s:1197 +0x4 fp=0x400003dfd0 sp=0x400003dfd0 pc=0x775c4
created by database/sql.(*Rows).initContextClose in goroutine 1
        /usr/local/go/src/database/sql/sql.go:2949 +0x180

Metadata

Metadata

Assignees

No one assigned

    Labels

    NeedsInvestigationSomeone must examine and confirm this is a valid issue and not a duplicate of an existing one.

    Type

    No type

    Projects

    No projects

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions