Skip to content

Commit

Permalink
Cleanup
Browse files Browse the repository at this point in the history
  • Loading branch information
gagliardetto committed Feb 26, 2025
1 parent 173c25a commit 817ff97
Show file tree
Hide file tree
Showing 6 changed files with 32 additions and 43 deletions.
53 changes: 17 additions & 36 deletions cmd-x-index-all.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@ import (
"math/rand"
"os"
"path/filepath"
"sort"
"time"

"github.com/dustin/go-humanize"
Expand Down Expand Up @@ -65,6 +64,11 @@ func newCmd_Index_all() *cli.Command {
Name: "car",
Usage: "Path to a CAR file containing a single Solana epoch, or multiple split CAR files (in order) containing a single Solana epoch",
},
&cli.Uint64Flag{
Name: "epoch",
Usage: "the epoch number",
Required: true,
},
},
Subcommands: []*cli.Command{},
Action: func(c *cli.Context) error {
Expand All @@ -84,6 +88,8 @@ func newCmd_Index_all() *cli.Command {
return fmt.Errorf("index-dir is not a directory")
}

epoch := c.Uint64("epoch")

{
startedAt := time.Now()
defer func() {
Expand All @@ -95,6 +101,7 @@ func newCmd_Index_all() *cli.Command {
indexPaths, numTotalItems, err := createAllIndexes(
c.Context,
network,
epoch,
tmpDir,
carPaths,
indexDir,
Expand Down Expand Up @@ -122,6 +129,7 @@ func newCmd_Index_all() *cli.Command {
func createAllIndexes(
ctx context.Context,
network indexes.Network,
epoch uint64,
tmpDir string,
carPaths []string,
indexDir string,
Expand All @@ -143,40 +151,15 @@ func createAllIndexes(
}
klog.Infof("Root CID: %s", rootCID)

klog.Infof("Counting items in car file...")
numItems, epochObject, err := carCountItemsByFirstByte(carPaths...)
if err != nil {
return nil, 0, fmt.Errorf("failed to count items in car file: %w", err)
}
if epochObject == nil {
return nil, 0, fmt.Errorf("failed to find epoch object in the car file")
}
fmt.Println()
klog.Infof("Found items in car file:")
numTotalItems := uint64(0)
var kinds []byte
for kind := range numItems {
kinds = append(kinds, kind)
}
// sort from byte value:
sort.Slice(kinds, func(i, j int) bool {
return kinds[i] < kinds[j]
})
for _, kind := range kinds {
klog.Infof(" %s: %s items", iplddecoders.Kind(kind), humanize.Comma(int64(numItems[kind])))
numTotalItems += numItems[kind]
}
klog.Infof("Total: %s items", humanize.Comma(int64(numTotalItems)))

epoch := uint64(epochObject.Epoch)
klog.Infof("This CAR file is for epoch %d and cluster %s", epoch, network)

hardcodedNumTotalItems := uint64(1_000_000_000)
cid_to_offset_and_size, err := NewBuilder_CidToOffset(
epoch,
rootCID,
network,
tmpDir,
numTotalItems,
hardcodedNumTotalItems,
)
if err != nil {
return nil, 0, fmt.Errorf("failed to create cid_to_offset_and_size index: %w", err)
Expand All @@ -188,19 +171,19 @@ func createAllIndexes(
rootCID,
network,
tmpDir,
numItems[byte(iplddecoders.KindBlock)],
)
if err != nil {
return nil, 0, fmt.Errorf("failed to create slot_to_cid index: %w", err)
}
defer slot_to_cid.Close()

hardcodedNumTransactions := uint64(1_000_000_000) // THis is used to determine the number of buckets in the index
sig_to_cid, err := NewBuilder_SignatureToCid(
epoch,
rootCID,
network,
tmpDir,
numItems[byte(iplddecoders.KindTransaction)],
hardcodedNumTransactions,
)
if err != nil {
return nil, 0, fmt.Errorf("failed to create sig_to_cid index: %w", err)
Expand Down Expand Up @@ -292,7 +275,7 @@ func createAllIndexes(

if numIndexedOffsets%1_000_000 == 0 && numIndexedOffsets > 0 {
timeForChunk := time.Since(lastCheckpoint)
numChunksLeft := ((numTotalItems - numIndexedOffsets) / 1_000_000) + 1
numChunksLeft := ((hardcodedNumTotalItems - numIndexedOffsets) / 1_000_000) + 1
eta = timeForChunk * time.Duration(numChunksLeft)
lastCheckpoint = time.Now()
}
Expand All @@ -306,8 +289,8 @@ func createAllIndexes(
printToStderr(
fmt.Sprintf("\rIndexing: %s/%s items [%s%%] %s",
humanize.Comma(int64(numIndexedOffsets)),
humanize.Comma(int64(numTotalItems)),
humanize.CommafWithDigits(float64(numIndexedOffsets)/float64(numTotalItems)*100, 2),
humanize.Comma(int64(hardcodedNumTotalItems)),
humanize.CommafWithDigits(float64(numIndexedOffsets)/float64(hardcodedNumTotalItems)*100, 2),
etaString,
),
)
Expand Down Expand Up @@ -411,7 +394,7 @@ func createAllIndexes(
}
}

return paths, numTotalItems, nil
return paths, hardcodedNumTotalItems, nil
}

func greenBackground(s string) string {
Expand Down Expand Up @@ -508,7 +491,6 @@ func NewBuilder_SlotToCid(
rootCid cid.Cid,
network indexes.Network,
tmpDir string,
numItems uint64,
) (*indexes.SlotToCid_Writer, error) {
tmpDir = filepath.Join(tmpDir, "index-slot-to-cid-"+time.Now().Format("20060102-150405.000000000")+fmt.Sprintf("-%d", rand.Int63()))
if err := os.MkdirAll(tmpDir, 0o755); err != nil {
Expand All @@ -519,7 +501,6 @@ func NewBuilder_SlotToCid(
rootCid,
network,
tmpDir,
numItems,
)
if err != nil {
return nil, fmt.Errorf("failed to create slot_to_cid index: %w", err)
Expand Down
4 changes: 3 additions & 1 deletion indexes/index-cid-to-offset-and-size.go
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,9 @@ func (w *CidToOffsetAndSize_Writer) Seal(ctx context.Context, dstDir string) err
filepath := filepath.Join(dstDir, formatFilename_CidToOffsetAndSize(w.meta.Epoch, w.meta.RootCid, w.meta.Network))
w.finalPath = filepath

file, err := os.Create(filepath)
defer os.Rename(filepath+".tmp", filepath)

file, err := os.Create(filepath + ".tmp")
if err != nil {
return fmt.Errorf("failed to create file: %w", err)
}
Expand Down
4 changes: 3 additions & 1 deletion indexes/index-pubkey-to-offset-and-size.go
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,9 @@ func (w *PubkeyToOffsetAndSize_Writer) SealWithFilename(ctx context.Context, dst
filepath := dstFilepath
w.finalPath = filepath

file, err := os.Create(filepath)
defer os.Rename(filepath+".tmp", filepath)

file, err := os.Create(filepath + ".tmp")
if err != nil {
return fmt.Errorf("failed to create file: %w", err)
}
Expand Down
4 changes: 3 additions & 1 deletion indexes/index-sig-to-cid.go
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,9 @@ func (w *SigToCid_Writer) Seal(ctx context.Context, dstDir string) error {
filepath := filepath.Join(dstDir, formatFilename_SigToCid(w.meta.Epoch, w.meta.RootCid, w.meta.Network))
w.finalPath = filepath

file, err := os.Create(filepath)
defer os.Rename(filepath+".tmp", filepath)

file, err := os.Create(filepath + ".tmp")
if err != nil {
return fmt.Errorf("failed to create file: %w", err)
}
Expand Down
9 changes: 6 additions & 3 deletions indexes/index-slot-to-cid.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,12 +37,13 @@ func formatFilename_SlotToCid(epoch uint64, rootCid cid.Cid, network Network) st

var Kind_SlotToCid = []byte("slot-to-cid")

const SLOTS_PER_EPOCH = 432000

func NewWriter_SlotToCid(
epoch uint64,
rootCid cid.Cid,
network Network,
tmpDir string, // Where to put the temporary index files; WILL BE DELETED.
numItems uint64,
) (*SlotToCid_Writer, error) {
if !IsValidNetwork(network) {
return nil, ErrInvalidNetwork
Expand All @@ -52,7 +53,7 @@ func NewWriter_SlotToCid(
}
index, err := compactindexsized.NewBuilderSized(
tmpDir,
uint(numItems),
uint(SLOTS_PER_EPOCH),
IndexValueSize_SlotToCid,
)
if err != nil {
Expand Down Expand Up @@ -94,7 +95,9 @@ func (w *SlotToCid_Writer) Seal(ctx context.Context, dstDir string) error {
filepath := filepath.Join(dstDir, formatFilename_SlotToCid(w.meta.Epoch, w.meta.RootCid, w.meta.Network))
w.finalPath = filepath

file, err := os.Create(filepath)
defer os.Rename(filepath+".tmp", filepath)

file, err := os.Create(filepath + ".tmp")
if err != nil {
return fmt.Errorf("failed to create file: %w", err)
}
Expand Down
1 change: 0 additions & 1 deletion indexes/index-slot-to-cid_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@ func TestSlotToCid(t *testing.T) {
rootCid,
indexes.NetworkMainnet,
"",
numItems,
)
require.NoError(t, err)
require.NotNil(t, writer)
Expand Down

0 comments on commit 817ff97

Please sign in to comment.