diff --git a/src/sync/pool.go b/src/sync/pool.go
index 3359aba57b312e..523b6b08335031 100644
--- a/src/sync/pool.go
+++ b/src/sync/pool.go
@@ -5,6 +5,7 @@
 package sync
 
 import (
+	"internal/cpu"
 	"internal/race"
 	"runtime"
 	"sync/atomic"
@@ -76,6 +77,7 @@ type poolLocal struct {
 }
 
 // from runtime
+//
 //go:linkname runtime_randn runtime.randn
 func runtime_randn(n uint32) uint32
 
@@ -221,8 +223,9 @@ func (p *Pool) pinSlow() (*poolLocal, int) {
 	// Retry under the mutex.
 	// Can not lock the mutex while pinned.
 	runtime_procUnpin()
-	allPoolsMu.Lock()
-	defer allPoolsMu.Unlock()
+	a := &allPools[uintptr(unsafe.Pointer(p))%shardPoolsSize]
+	a.lock.Lock()
+	defer a.lock.Unlock()
 	pid := runtime_procPin()
 	// poolCleanup won't be called while we are pinned.
 	s := p.localSize
@@ -231,7 +234,7 @@ func (p *Pool) pinSlow() (*poolLocal, int) {
 		return indexLocal(l, pid), pid
 	}
 	if p.local == nil {
-		allPools = append(allPools, p)
+		a.s = append(a.s, p)
 	}
 	// If GOMAXPROCS changes between GCs, we re-allocate the array and lose the old one.
 	size := runtime.GOMAXPROCS(0)
@@ -249,35 +252,47 @@ func poolCleanup() {
 	// pinned section (in effect, this has all Ps pinned).
 
 	// Drop victim caches from all pools.
-	for _, p := range oldPools {
-		p.victim = nil
-		p.victimSize = 0
+	for i, s := range oldPools {
+		for _, p := range s.s {
+			p.victim = nil
+			p.victimSize = 0
+		}
+		oldPools[i].s = nil
 	}
 
 	// Move primary cache to victim cache.
-	for _, p := range allPools {
-		p.victim = p.local
-		p.victimSize = p.localSize
-		p.local = nil
-		p.localSize = 0
+	for i := range allPools {
+		for _, p := range allPools[i].s {
+			p.victim = p.local
+			p.victimSize = p.localSize
+			p.local = nil
+			p.localSize = 0
+		}
+		oldPools[i].s = allPools[i].s
+		allPools[i].s = nil
 	}
 
 	// The pools with non-empty primary caches now have non-empty
 	// victim caches and no pools have primary caches.
-	oldPools, allPools = allPools, nil
 }
 
-var (
-	allPoolsMu Mutex
+const shardPoolsSize = 256
 
+var (
 	// allPools is the set of pools that have non-empty primary
-	// caches. Protected by either 1) allPoolsMu and pinning or 2)
+	// caches. Protected by either 1) lock of elem or 2)
 	// STW.
-	allPools []*Pool
+	allPools [shardPoolsSize]struct {
+		lock Mutex
+		s    []*Pool
+		_    [cpu.CacheLinePadSize - unsafe.Sizeof([]*Pool{}) - unsafe.Sizeof(Mutex{})]int8
+	}
 
 	// oldPools is the set of pools that may have non-empty victim
 	// caches. Protected by STW.
-	oldPools []*Pool
+	oldPools [shardPoolsSize]struct {
+		s []*Pool
+	}
 )
 
 func init() {
diff --git a/src/sync/pool_test.go b/src/sync/pool_test.go
index 1b6746dbfbaf9c..ac15132e9a790c 100644
--- a/src/sync/pool_test.go
+++ b/src/sync/pool_test.go
@@ -393,3 +393,12 @@ func BenchmarkPoolExpensiveNew(b *testing.B) {
 	b.ReportMetric(float64(mstats2.NumGC-mstats1.NumGC)/float64(b.N), "GCs/op")
 	b.ReportMetric(float64(nNew)/float64(b.N), "New/op")
 }
+
+func BenchmarkPoolpinSlow(b *testing.B) {
+	b.RunParallel(func(pb *testing.PB) {
+		for pb.Next() {
+			var p Pool
+			p.Get()
+		}
+	})
+}