diff --git a/cmd-x-index-all.go b/cmd-x-index-all.go index ed0d1396..f90ccdc8 100644 --- a/cmd-x-index-all.go +++ b/cmd-x-index-all.go @@ -9,7 +9,6 @@ import ( "math/rand" "os" "path/filepath" - "sort" "time" "github.com/dustin/go-humanize" @@ -65,6 +64,11 @@ func newCmd_Index_all() *cli.Command { Name: "car", Usage: "Path to a CAR file containing a single Solana epoch, or multiple split CAR files (in order) containing a single Solana epoch", }, + &cli.Uint64Flag{ + Name: "epoch", + Usage: "the epoch number", + Required: true, + }, }, Subcommands: []*cli.Command{}, Action: func(c *cli.Context) error { @@ -84,6 +88,8 @@ func newCmd_Index_all() *cli.Command { return fmt.Errorf("index-dir is not a directory") } + epoch := c.Uint64("epoch") + { startedAt := time.Now() defer func() { @@ -95,6 +101,7 @@ func newCmd_Index_all() *cli.Command { indexPaths, numTotalItems, err := createAllIndexes( c.Context, network, + epoch, tmpDir, carPaths, indexDir, @@ -122,6 +129,7 @@ func newCmd_Index_all() *cli.Command { func createAllIndexes( ctx context.Context, network indexes.Network, + epoch uint64, tmpDir string, carPaths []string, indexDir string, @@ -143,40 +151,15 @@ func createAllIndexes( } klog.Infof("Root CID: %s", rootCID) - klog.Infof("Counting items in car file...") - numItems, epochObject, err := carCountItemsByFirstByte(carPaths...) - if err != nil { - return nil, 0, fmt.Errorf("failed to count items in car file: %w", err) - } - if epochObject == nil { - return nil, 0, fmt.Errorf("failed to find epoch object in the car file") - } - fmt.Println() - klog.Infof("Found items in car file:") - numTotalItems := uint64(0) - var kinds []byte - for kind := range numItems { - kinds = append(kinds, kind) - } - // sort from byte value: - sort.Slice(kinds, func(i, j int) bool { - return kinds[i] < kinds[j] - }) - for _, kind := range kinds { - klog.Infof(" %s: %s items", iplddecoders.Kind(kind), humanize.Comma(int64(numItems[kind]))) - numTotalItems += numItems[kind] - } - klog.Infof("Total: %s items", humanize.Comma(int64(numTotalItems))) - - epoch := uint64(epochObject.Epoch) klog.Infof("This CAR file is for epoch %d and cluster %s", epoch, network) + hardcodedNumTotalItems := uint64(1_000_000_000) cid_to_offset_and_size, err := NewBuilder_CidToOffset( epoch, rootCID, network, tmpDir, - numTotalItems, + hardcodedNumTotalItems, ) if err != nil { return nil, 0, fmt.Errorf("failed to create cid_to_offset_and_size index: %w", err) @@ -188,19 +171,19 @@ func createAllIndexes( rootCID, network, tmpDir, - numItems[byte(iplddecoders.KindBlock)], ) if err != nil { return nil, 0, fmt.Errorf("failed to create slot_to_cid index: %w", err) } defer slot_to_cid.Close() + hardcodedNumTransactions := uint64(1_000_000_000) // THis is used to determine the number of buckets in the index sig_to_cid, err := NewBuilder_SignatureToCid( epoch, rootCID, network, tmpDir, - numItems[byte(iplddecoders.KindTransaction)], + hardcodedNumTransactions, ) if err != nil { return nil, 0, fmt.Errorf("failed to create sig_to_cid index: %w", err) @@ -292,7 +275,7 @@ func createAllIndexes( if numIndexedOffsets%1_000_000 == 0 && numIndexedOffsets > 0 { timeForChunk := time.Since(lastCheckpoint) - numChunksLeft := ((numTotalItems - numIndexedOffsets) / 1_000_000) + 1 + numChunksLeft := ((hardcodedNumTotalItems - numIndexedOffsets) / 1_000_000) + 1 eta = timeForChunk * time.Duration(numChunksLeft) lastCheckpoint = time.Now() } @@ -306,8 +289,8 @@ func createAllIndexes( printToStderr( fmt.Sprintf("\rIndexing: %s/%s items [%s%%] %s", humanize.Comma(int64(numIndexedOffsets)), - humanize.Comma(int64(numTotalItems)), - humanize.CommafWithDigits(float64(numIndexedOffsets)/float64(numTotalItems)*100, 2), + humanize.Comma(int64(hardcodedNumTotalItems)), + humanize.CommafWithDigits(float64(numIndexedOffsets)/float64(hardcodedNumTotalItems)*100, 2), etaString, ), ) @@ -411,7 +394,7 @@ func createAllIndexes( } } - return paths, numTotalItems, nil + return paths, hardcodedNumTotalItems, nil } func greenBackground(s string) string { @@ -508,7 +491,6 @@ func NewBuilder_SlotToCid( rootCid cid.Cid, network indexes.Network, tmpDir string, - numItems uint64, ) (*indexes.SlotToCid_Writer, error) { tmpDir = filepath.Join(tmpDir, "index-slot-to-cid-"+time.Now().Format("20060102-150405.000000000")+fmt.Sprintf("-%d", rand.Int63())) if err := os.MkdirAll(tmpDir, 0o755); err != nil { @@ -519,7 +501,6 @@ func NewBuilder_SlotToCid( rootCid, network, tmpDir, - numItems, ) if err != nil { return nil, fmt.Errorf("failed to create slot_to_cid index: %w", err) diff --git a/indexes/index-cid-to-offset-and-size.go b/indexes/index-cid-to-offset-and-size.go index 513560f1..4daac3b9 100644 --- a/indexes/index-cid-to-offset-and-size.go +++ b/indexes/index-cid-to-offset-and-size.go @@ -97,7 +97,9 @@ func (w *CidToOffsetAndSize_Writer) Seal(ctx context.Context, dstDir string) err filepath := filepath.Join(dstDir, formatFilename_CidToOffsetAndSize(w.meta.Epoch, w.meta.RootCid, w.meta.Network)) w.finalPath = filepath - file, err := os.Create(filepath) + defer os.Rename(filepath+".tmp", filepath) + + file, err := os.Create(filepath + ".tmp") if err != nil { return fmt.Errorf("failed to create file: %w", err) } diff --git a/indexes/index-pubkey-to-offset-and-size.go b/indexes/index-pubkey-to-offset-and-size.go index 2bd12ebe..4b4707b0 100644 --- a/indexes/index-pubkey-to-offset-and-size.go +++ b/indexes/index-pubkey-to-offset-and-size.go @@ -103,7 +103,9 @@ func (w *PubkeyToOffsetAndSize_Writer) SealWithFilename(ctx context.Context, dst filepath := dstFilepath w.finalPath = filepath - file, err := os.Create(filepath) + defer os.Rename(filepath+".tmp", filepath) + + file, err := os.Create(filepath + ".tmp") if err != nil { return fmt.Errorf("failed to create file: %w", err) } diff --git a/indexes/index-sig-to-cid.go b/indexes/index-sig-to-cid.go index 69d7558c..e6892e70 100644 --- a/indexes/index-sig-to-cid.go +++ b/indexes/index-sig-to-cid.go @@ -95,7 +95,9 @@ func (w *SigToCid_Writer) Seal(ctx context.Context, dstDir string) error { filepath := filepath.Join(dstDir, formatFilename_SigToCid(w.meta.Epoch, w.meta.RootCid, w.meta.Network)) w.finalPath = filepath - file, err := os.Create(filepath) + defer os.Rename(filepath+".tmp", filepath) + + file, err := os.Create(filepath + ".tmp") if err != nil { return fmt.Errorf("failed to create file: %w", err) } diff --git a/indexes/index-slot-to-cid.go b/indexes/index-slot-to-cid.go index 35ae518c..93dbf139 100644 --- a/indexes/index-slot-to-cid.go +++ b/indexes/index-slot-to-cid.go @@ -37,12 +37,13 @@ func formatFilename_SlotToCid(epoch uint64, rootCid cid.Cid, network Network) st var Kind_SlotToCid = []byte("slot-to-cid") +const SLOTS_PER_EPOCH = 432000 + func NewWriter_SlotToCid( epoch uint64, rootCid cid.Cid, network Network, tmpDir string, // Where to put the temporary index files; WILL BE DELETED. - numItems uint64, ) (*SlotToCid_Writer, error) { if !IsValidNetwork(network) { return nil, ErrInvalidNetwork @@ -52,7 +53,7 @@ func NewWriter_SlotToCid( } index, err := compactindexsized.NewBuilderSized( tmpDir, - uint(numItems), + uint(SLOTS_PER_EPOCH), IndexValueSize_SlotToCid, ) if err != nil { @@ -94,7 +95,9 @@ func (w *SlotToCid_Writer) Seal(ctx context.Context, dstDir string) error { filepath := filepath.Join(dstDir, formatFilename_SlotToCid(w.meta.Epoch, w.meta.RootCid, w.meta.Network)) w.finalPath = filepath - file, err := os.Create(filepath) + defer os.Rename(filepath+".tmp", filepath) + + file, err := os.Create(filepath + ".tmp") if err != nil { return fmt.Errorf("failed to create file: %w", err) } diff --git a/indexes/index-slot-to-cid_test.go b/indexes/index-slot-to-cid_test.go index a236e2ea..33df01ae 100644 --- a/indexes/index-slot-to-cid_test.go +++ b/indexes/index-slot-to-cid_test.go @@ -25,7 +25,6 @@ func TestSlotToCid(t *testing.T) { rootCid, indexes.NetworkMainnet, "", - numItems, ) require.NoError(t, err) require.NotNil(t, writer)