Skip to content

Commit

Permalink
optimize GLS
Browse files Browse the repository at this point in the history
Signed-off-by: Achille Roussel <[email protected]>
  • Loading branch information
achille-roussel committed Sep 19, 2023
1 parent 92aafe1 commit c123b28
Show file tree
Hide file tree
Showing 2 changed files with 73 additions and 27 deletions.
19 changes: 19 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
# Binaries for programs and plugins
*.exe
*.exe~
*.dll
*.so
*.dylib

# Test binary, built with `go test -c`
*.test
*.wasm

# Output of the go coverage tool, specifically when used with LiteIDE
*.out

# Dependency directories (remove the comment below to include it)
# vendor/

# Emacs
*~
81 changes: 54 additions & 27 deletions gls.go
Original file line number Diff line number Diff line change
@@ -1,49 +1,76 @@
package coroutine

import "sync"
import (
"encoding/binary"
"hash/maphash"
"sync"
)

// goroutine local storage; the map contains one entry for each goroutine that
// is started to power a coroutine.
//
// TOOD: the global mutex is likely going to become a contention point in highly
// parallel programs, here's how we should fix:
//
// - create a sharded map with 64 buckets, each bucket contains a map
// - use a sync.Mutex in each bucket for synchronization; cheaper than RWMutex
// - mask the value of g to determine in which bucket its GLS is stored
//
// An alternative to using a global map could be to analyze the memory layout of
// the runtime.g type and determine if there is spare room after the struct to
// store the Context pointer: the Go memory allocate uses size classes to park
// objects in buckets, there is often spare space after large values like the
// runtime.g type since they will be assigned to the size class greater or equal
// to their type size. We only need 4 or 8 bytes of spare space on 32 or 64 bit
// architectures. This approach would remove all potential contention accessing
// and synchronizing on global state, and would also turn the map lookups into
// simple memory loads.
// to their type size. We only need 8 or 16 bytes of spare space on 32 or 64 bit
// architectures to store the context type and value. This approach would remove
// all potential contention accessing and synchronizing on global state, and
// would also turn the map lookups into simple memory loads.
var (
gmutex sync.RWMutex
gstate map[uintptr]any
gstate glsTable
gseed = maphash.MakeSeed()
)

func loadContext(g uintptr) any {
gmutex.RLock()
v := gstate[g]
gmutex.RUnlock()
const glsTableBuckets = 64

type glsTable [glsTableBuckets]glsBucket

func (t *glsTable) bucket(k uintptr) *glsBucket {
b := [8]byte{}
binary.LittleEndian.PutUint64(b[:], uint64(k))
h := maphash.Bytes(gseed, b[:])
return &t[h%glsTableBuckets]
}

func (t *glsTable) load(k uintptr) any {
return t.bucket(k).load(k)
}

func (t *glsTable) store(k uintptr, v any) {
t.bucket(k).store(k, v)
}

func (t *glsTable) clear(k uintptr) {
t.bucket(k).clear(k)
}

type glsBucket struct {
values sync.Map
}

func (b *glsBucket) load(k uintptr) any {
v, _ := b.values.Load(k)
return v
}

func (b *glsBucket) store(k uintptr, v any) {
b.values.Store(k, v)
}

func (b *glsBucket) clear(k uintptr) {
b.values.Delete(k)
}

func loadContext(g uintptr) any {
return gstate.load(g)
}

func storeContext(g uintptr, c any) {
gmutex.Lock()
if gstate == nil {
gstate = make(map[uintptr]any)
}
gstate[g] = c
gmutex.Unlock()
gstate.store(g, c)
}

func clearContext(g uintptr) {
gmutex.Lock()
delete(gstate, g)
gmutex.Unlock()
gstate.clear(g)
}

0 comments on commit c123b28

Please sign in to comment.