-
Notifications
You must be signed in to change notification settings - Fork 117
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
feat: Support Monotonic UUIDv7
Batch Generation
#191
base: master
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -26,6 +26,7 @@ import ( | |
"crypto/rand" | ||
"crypto/sha1" | ||
"encoding/binary" | ||
"errors" | ||
"hash" | ||
"io" | ||
"net" | ||
|
@@ -193,6 +194,32 @@ func NewGenWithOptions(opts ...GenOption) *Gen { | |
return gen | ||
} | ||
|
||
// MonotonicGen extends the Gen struct with a counter for batch generation. | ||
// | ||
// MonotonicGen ensures the generation of strictly monotonic UUIDs within a | ||
// batch by utilizing a counter in conjunction with timestamps. This is | ||
// particularly useful for applications requiring ordered identifiers, such | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
v6 and v7 UUIDs are already ordered so none of this is strictly necessary except to enable batch generation, and I'm not sold on the utility there. I have used this library to generate UUIDs on the order of tens of millions per second sustained over 1000s of nodes in a distributed system without ever running into a scenario where I wanted/needed to pre-allocate a block of values. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I agree with @dylan-bourque - I'm really not sure how useful this is relative to the increased complexity. I haven't generated millions per second, but I can't see any reason that the existing implementation doesn't do what you want. Moreover, I'm concerned that having two separate implementations of generating a new UUIDv7 (the existing public method for generating one at a time and your new private method that is called in the loop) will lead to maintenance headaches. Is there a specific situation you have encountered in which the existing implementation fails to provide monotonically increasing values? If there is, please explain in more detail to help us understand the need. |
||
// as database indices or log sequencing. | ||
type MonotonicGen struct { | ||
Gen | ||
monotonicCounter uint16 | ||
monotonicMutex sync.Mutex | ||
} | ||
|
||
// NewMonotonicGen creates a MonotonicGen instance with configurable options. | ||
// | ||
// Arguments: | ||
// - opts: Configuration options for the generator. | ||
// | ||
// Returns: | ||
// - *MonotonicGen: The configured generator. | ||
func NewMonotonicGen(opts ...GenOption) *MonotonicGen { | ||
gen := &MonotonicGen{ | ||
Gen: *NewGenWithOptions(opts...), | ||
} | ||
return gen | ||
} | ||
|
||
// WithHWAddrFunc is a GenOption that allows you to provide your own HWAddrFunc | ||
// function. | ||
// When this option is nil, the defaultHWAddrFunc is used. | ||
|
@@ -327,15 +354,15 @@ func (g *Gen) NewV6AtTime(atTime time.Time) (UUID, error) { | |
binary.BigEndian.PutUint16(u[6:], uint16(timeNow&0xfff)) // set time_low (minus four version bits) | ||
|
||
// Based on the RFC 9562 recommendation that this data be fully random and not a monotonic counter, | ||
//we do NOT support batching version 6 UUIDs. | ||
//set clock_seq (14 bits) and node (48 bits) pseudo-random bits (first 2 bits will be overridden) | ||
// we do NOT support batching version 6 UUIDs. | ||
// set clock_seq (14 bits) and node (48 bits) pseudo-random bits (first 2 bits will be overridden) | ||
if _, err = io.ReadFull(g.rand, u[8:]); err != nil { | ||
return Nil, err | ||
} | ||
|
||
u.SetVersion(V6) | ||
|
||
//overwrite first 2 bits of byte[8] for the variant | ||
// overwrite first 2 bits of byte[8] for the variant | ||
u.SetVariant(VariantRFC9562) | ||
|
||
return u, nil | ||
|
@@ -368,29 +395,93 @@ func (g *Gen) NewV7AtTime(atTime time.Time) (UUID, error) { | |
if err != nil { | ||
return Nil, err | ||
} | ||
//UUIDv7 features a 48 bit timestamp. First 32bit (4bytes) represents seconds since 1970, followed by 2 bytes for the ms granularity. | ||
u[0] = byte(ms >> 40) //1-6 bytes: big-endian unsigned number of Unix epoch timestamp | ||
// UUIDv7 features a 48 bit timestamp. First 32bit (4bytes) represents seconds since 1970, followed by 2 bytes for the ms granularity. | ||
u[0] = byte(ms >> 40) // 1-6 bytes: big-endian unsigned number of Unix epoch timestamp | ||
u[1] = byte(ms >> 32) | ||
u[2] = byte(ms >> 24) | ||
u[3] = byte(ms >> 16) | ||
u[4] = byte(ms >> 8) | ||
u[5] = byte(ms) | ||
|
||
//Support batching by using a monotonic pseudo-random sequence, | ||
//as described in RFC 9562 section 6.2, Method 1. | ||
//The 6th byte contains the version and partially rand_a data. | ||
//We will lose the most significant bites from the clockSeq (with SetVersion), but it is ok, | ||
//we need the least significant that contains the counter to ensure the monotonic property | ||
// Support batching by using a monotonic pseudo-random sequence, | ||
// as described in RFC 9562 section 6.2, Method 1. | ||
// The 6th byte contains the version and partially rand_a data. | ||
// We will lose the most significant bites from the clockSeq (with SetVersion), but it is ok, | ||
// we need the least significant that contains the counter to ensure the monotonic property | ||
binary.BigEndian.PutUint16(u[6:8], clockSeq) // set rand_a with clock seq which is random and monotonic | ||
|
||
//override first 4bits of u[6]. | ||
// override first 4bits of u[6]. | ||
u.SetVersion(V7) | ||
|
||
//set rand_b 64bits of pseudo-random bits (first 2 will be overridden) | ||
// set rand_b 64bits of pseudo-random bits (first 2 will be overridden) | ||
if _, err = io.ReadFull(g.rand, u[8:16]); err != nil { | ||
return Nil, err | ||
} | ||
//override first 2 bits of byte[8] for the variant | ||
// override first 2 bits of byte[8] for the variant | ||
u.SetVariant(VariantRFC9562) | ||
|
||
return u, nil | ||
} | ||
|
||
// GenerateBatchV7 creates a batch of k-sortable Version 7 UUIDs. | ||
// | ||
// Ensures strict monotonic ordering within the batch. | ||
// | ||
// Arguments: | ||
// - batchSize: Number of UUIDs to generate. | ||
// | ||
// Returns: | ||
// - []UUID: The generated UUIDs. | ||
// - error: If batch generation fails. | ||
|
||
func (g *MonotonicGen) GenerateBatchV7(batchSize int) ([]UUID, error) { | ||
if batchSize <= 0 { | ||
return nil, errors.New("batch size must be greater than zero") | ||
} | ||
|
||
uuids := make([]UUID, batchSize) | ||
|
||
for i := 0; i < batchSize; i++ { | ||
uuid, err := g.newMonotonicV7() | ||
if err != nil { | ||
return nil, err | ||
} | ||
uuids[i] = uuid | ||
} | ||
return uuids, nil | ||
} | ||
|
||
// newMonotonicV7 generates a Version 7 UUID with a monotonic counter for ordering. | ||
// | ||
// Returns: | ||
// - UUID: The generated UUID. | ||
// - error: If UUID generation fails. | ||
func (g *MonotonicGen) newMonotonicV7() (UUID, error) { | ||
var u UUID | ||
|
||
ms, clockSeq, err := g.getMonotonicClockSequence(true, g.epochFunc()) | ||
if err != nil { | ||
return Nil, err | ||
} | ||
|
||
// set the timestamp (48 bits) | ||
u[0] = byte(ms >> 40) | ||
u[1] = byte(ms >> 32) | ||
u[2] = byte(ms >> 24) | ||
u[3] = byte(ms >> 16) | ||
u[4] = byte(ms >> 8) | ||
u[5] = byte(ms) | ||
|
||
// set rand_a (clockSeq ensures monotonicity) | ||
binary.BigEndian.PutUint16(u[6:8], clockSeq) | ||
|
||
// override version and variant bits | ||
u.SetVersion(V7) | ||
|
||
// set rand_b (64 random bits) | ||
if _, err := io.ReadFull(g.rand, u[8:16]); err != nil { | ||
return Nil, err | ||
} | ||
u.SetVariant(VariantRFC9562) | ||
|
||
return u, nil | ||
|
@@ -434,6 +525,40 @@ func (g *Gen) getClockSequence(useUnixTSMs bool, atTime time.Time) (uint64, uint | |
return timeNow, g.clockSequence, nil | ||
} | ||
|
||
// getMonotonicClockSequence returns a timestamp and clock sequence to ensure | ||
// monotonic UUID generation, even when timestamps are identical. | ||
// | ||
// Arguments: | ||
// - useUnixTSMs: Whether to use millisecond precision for the timestamp. | ||
// - atTime: The reference time. | ||
// | ||
// Returns: | ||
// - uint64: The timestamp. | ||
// - uint16: The clock sequence. | ||
// - error: If the sequence generation fails. | ||
func (g *MonotonicGen) getMonotonicClockSequence(useUnixTSMs bool, atTime time.Time) (uint64, uint16, error) { | ||
g.monotonicMutex.Lock() | ||
defer g.monotonicMutex.Unlock() | ||
|
||
var timeNow uint64 | ||
if useUnixTSMs { | ||
timeNow = uint64(atTime.UnixMilli()) | ||
} else { | ||
timeNow = g.getEpoch(atTime) | ||
} | ||
|
||
// If timeNow <= lastTime, increment the counter to ensure monotonicity. | ||
if timeNow <= g.lastTime { | ||
g.monotonicCounter++ | ||
} else { | ||
g.monotonicCounter = 0 | ||
} | ||
|
||
g.lastTime = timeNow | ||
|
||
return timeNow, g.monotonicCounter, nil | ||
} | ||
|
||
// Returns the hardware address. | ||
func (g *Gen) getHardwareAddr() ([]byte, error) { | ||
var err error | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Assuming we move forward I think this function should be called
WithExplicitRandSeed()
, or something similar, to better reflect what it's actually doing.