Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Experiment with speeding up podsi indexing #1806

Closed
wants to merge 10 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -377,3 +377,5 @@ require (
gonum.org/v1/gonum v0.13.0 // indirect
gopkg.in/inf.v0 v0.9.1 // indirect
)

replace github.com/filecoin-project/go-data-segment v0.0.1 => github.com/ischasny/go-data-segment v0.0.0-20231107120541-53b3ec9a7c69
4 changes: 2 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -347,8 +347,6 @@ github.com/filecoin-project/go-commp-utils/nonffi v0.0.0-20220905160352-62059082
github.com/filecoin-project/go-crypto v0.0.0-20191218222705-effae4ea9f03/go.mod h1:+viYnvGtUTgJRdy6oaeF4MTFKAfatX071MPDPBL11EQ=
github.com/filecoin-project/go-crypto v0.0.1 h1:AcvpSGGCgjaY8y1az6AMfKQWreF/pWO2JJGLl6gCq6o=
github.com/filecoin-project/go-crypto v0.0.1/go.mod h1:+viYnvGtUTgJRdy6oaeF4MTFKAfatX071MPDPBL11EQ=
github.com/filecoin-project/go-data-segment v0.0.1 h1:1wmDxOG4ubWQm3ZC1XI5nCon5qgSq7Ra3Rb6Dbu10Gs=
github.com/filecoin-project/go-data-segment v0.0.1/go.mod h1:H0/NKbsRxmRFBcLibmABv+yFNHdmtl5AyplYLnb0Zv4=
github.com/filecoin-project/go-data-transfer v1.15.4-boost h1:rGsPDeDk0nbzLOPn/9iCIrhLNy69Vkr9tRBcetM4kd0=
github.com/filecoin-project/go-data-transfer v1.15.4-boost/go.mod h1:S5Es9uoD+3TveYyGjxZInAF6mSQtRjNzezV7Y7Sh8X0=
github.com/filecoin-project/go-data-transfer/v2 v2.0.0-rc7 h1:v+zJS5B6pA3ptWZS4t8tbt1Hz9qENnN4nVr1w99aSWc=
Expand Down Expand Up @@ -954,6 +952,8 @@ github.com/ipni/storetheindex v0.8.1 h1:3uHclkcQWlIXQx+We4tbGF/XzoZYERz3so34xQbU
github.com/ipni/storetheindex v0.8.1/go.mod h1:K4AR2bRll46YCWeGvob5foN/Z/kuovPdlUeJKOHVQHo=
github.com/ipsn/go-secp256k1 v0.0.0-20180726113642-9d62b9f0bc52 h1:QG4CGBqCeuBo6aZlGAamSkxWdgWfZGeE49eUOWJPA4c=
github.com/ipsn/go-secp256k1 v0.0.0-20180726113642-9d62b9f0bc52/go.mod h1:fdg+/X9Gg4AsAIzWpEHwnqd+QY3b7lajxyjE1m4hkq4=
github.com/ischasny/go-data-segment v0.0.0-20231107120541-53b3ec9a7c69 h1:prNO3cadvXtRXItvhQoaJ0qfF3a1sPkZ8B6epyuCLPo=
github.com/ischasny/go-data-segment v0.0.0-20231107120541-53b3ec9a7c69/go.mod h1:4ZWx04e7pKuozznBnZarZFJQ+PeUEKPp/Lv7M6K7bog=
github.com/jackc/chunkreader v1.0.0/go.mod h1:RT6O25fNZIuasFJRyZ4R/Y2BbhasbmZXF9QQ7T3kePo=
github.com/jackc/chunkreader/v2 v2.0.0/go.mod h1:odVSm741yZoC3dpHEUXIqA9tQRhFrgOHwnPIn9lDKlk=
github.com/jackc/chunkreader/v2 v2.0.1 h1:i+RDz65UE+mmpjTfyz0MoVTnzeYxroil2G82ki7MGG8=
Expand Down
239 changes: 239 additions & 0 deletions itests/dummydeal_podsi_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,239 @@
package itests

import (
"bytes"
"context"
"fmt"
"io"
"math/bits"
"os"
"path"
"path/filepath"
"testing"
"time"

"github.com/davecgh/go-spew/spew"
"github.com/filecoin-project/boost/itests/framework"
"github.com/filecoin-project/boost/testutil"
"github.com/filecoin-project/go-data-segment/datasegment"
commcid "github.com/filecoin-project/go-fil-commcid"
commp "github.com/filecoin-project/go-fil-commp-hashhash"
"github.com/filecoin-project/go-state-types/abi"
"github.com/filecoin-project/lotus/itests/kit"
"github.com/google/uuid"
"github.com/ipfs/go-cid"
"github.com/ipfs/go-libipfs/blocks"
"github.com/ipfs/go-unixfsnode/data/builder"
"github.com/ipld/go-car/v2"
"github.com/ipld/go-car/v2/blockstore"
dagpb "github.com/ipld/go-codec-dagpb"
"github.com/ipld/go-ipld-prime"
cidlink "github.com/ipld/go-ipld-prime/linking/cid"
"github.com/multiformats/go-multicodec"
multihash "github.com/multiformats/go-multihash"
"github.com/stretchr/testify/require"
)

func TestDummyPodsiDealOnline(t *testing.T) {
randomFileSize := int(4e6)

ctx := context.Background()
log := framework.Log

kit.QuietMiningLogs()
framework.SetLogLevel()
var opts []framework.FrameworkOpts
opts = append(opts, framework.EnableLegacyDeals(true), framework.SetMaxStagingBytes(10e9), framework.SetProvisionalWalletBalances(9e18))
f := framework.NewTestFramework(ctx, t, opts...)
err := f.Start()
require.NoError(t, err)
defer f.Stop()

err = f.AddClientProviderBalance(abi.NewTokenAmount(5e18))
require.NoError(t, err)

tempdir := t.TempDir()
log.Debugw("using tempdir", "dir", tempdir)

// create a random file
randomFilepath, err := testutil.CreateRandomFile(tempdir, 5, randomFileSize)
require.NoError(t, err)

carFile := filepath.Join(tempdir, "test.car")
dataSegmentFile := filepath.Join(tempdir, "datasegment.dat")

// pack it into the car
rootCid, err := createCar(t, carFile, []string{randomFilepath})
require.NoError(t, err)

// pack the car into data segement piece twice so that we have two segments
makeDataSegmentPiece(t, dataSegmentFile, []string{carFile, carFile})

// Start a web server to serve the car files
log.Debug("starting webserver")
server, err := testutil.HttpTestFileServer(t, tempdir)
require.NoError(t, err)
defer server.Close()

// Create a new dummy deal
log.Debug("creating dummy deal")
dealUuid := uuid.New()

// Make a deal
res, err := f.MakeDummyDeal(dealUuid, dataSegmentFile, rootCid, server.URL+"/"+filepath.Base(dataSegmentFile), false)
require.NoError(t, err)
require.True(t, res.Result.Accepted)
log.Debugw("got response from MarketDummyDeal", "res", spew.Sdump(res))

time.Sleep(2 * time.Second)

// Wait for the first deal to be added to a sector and cleaned up so space is made
err = f.WaitForDealAddedToSector(dealUuid)
require.NoError(t, err)
time.Sleep(100 * time.Millisecond)

}

func makeDataSegmentPiece(t *testing.T, dataSegmentFile string, subPieces []string) {
readers := make([]io.Reader, 0)
deals := make([]abi.PieceInfo, 0)
for _, sp := range subPieces {
arg, err := os.Open(sp)
require.NoError(t, err)

readers = append(readers, arg)
cp := new(commp.Calc)
io.Copy(cp, arg)
rawCommP, size, err := cp.Digest()
require.NoError(t, err)

arg.Seek(0, io.SeekStart)
c, _ := commcid.DataCommitmentV1ToCID(rawCommP)
subdeal := abi.PieceInfo{
Size: abi.PaddedPieceSize(size),
PieceCID: c,
}
deals = append(deals, subdeal)
}
require.NotEqual(t, 0, len(deals))

_, size, err := datasegment.ComputeDealPlacement(deals)
require.NoError(t, err)

overallSize := abi.PaddedPieceSize(size)
// we need to make this the 'next' power of 2 in order to have space for the index
next := 1 << (64 - bits.LeadingZeros64(uint64(overallSize+256)))

a, err := datasegment.NewAggregate(abi.PaddedPieceSize(next), deals)
require.NoError(t, err)
out, err := a.AggregateObjectReader(readers)
require.NoError(t, err)

// open output file
fo, err := os.Create(dataSegmentFile)
require.NoError(t, err)
defer fo.Close()

written, err := io.Copy(fo, out)
require.NoError(t, err)
require.NotZero(t, written)
}

func createCar(t *testing.T, carFile string, files []string) (cid.Cid, error) {
// make a cid with the right length that we eventually will patch with the root.
hasher, err := multihash.GetHasher(multihash.SHA2_256)
if err != nil {
return cid.Undef, err
}
digest := hasher.Sum([]byte{})
hash, err := multihash.Encode(digest, multihash.SHA2_256)
if err != nil {
return cid.Undef, err
}
proxyRoot := cid.NewCidV1(uint64(multicodec.DagPb), hash)

options := []car.Option{}

cdest, err := blockstore.OpenReadWrite(carFile, []cid.Cid{proxyRoot}, options...)

if err != nil {
return cid.Undef, err
}

// Write the unixfs blocks into the store.
root, err := writeFiles(context.Background(), false, cdest, files...)
if err != nil {
return cid.Undef, err
}

if err := cdest.Finalize(); err != nil {
return cid.Undef, err
}
// re-open/finalize with the final root.
return root, car.ReplaceRootsInFile(carFile, []cid.Cid{root})
}

func writeFiles(ctx context.Context, noWrap bool, bs *blockstore.ReadWrite, paths ...string) (cid.Cid, error) {
ls := cidlink.DefaultLinkSystem()
ls.TrustedStorage = true
ls.StorageReadOpener = func(_ ipld.LinkContext, l ipld.Link) (io.Reader, error) {
cl, ok := l.(cidlink.Link)
if !ok {
return nil, fmt.Errorf("not a cidlink")
}
blk, err := bs.Get(ctx, cl.Cid)
if err != nil {
return nil, err
}
return bytes.NewBuffer(blk.RawData()), nil
}
ls.StorageWriteOpener = func(_ ipld.LinkContext) (io.Writer, ipld.BlockWriteCommitter, error) {
buf := bytes.NewBuffer(nil)
return buf, func(l ipld.Link) error {
cl, ok := l.(cidlink.Link)
if !ok {
return fmt.Errorf("not a cidlink")
}
blk, err := blocks.NewBlockWithCid(buf.Bytes(), cl.Cid)
if err != nil {
return err
}
bs.Put(ctx, blk)
return nil
}, nil
}

topLevel := make([]dagpb.PBLink, 0, len(paths))
for _, p := range paths {
l, size, err := builder.BuildUnixFSRecursive(p, &ls)
if err != nil {
return cid.Undef, err
}
if noWrap {
rcl, ok := l.(cidlink.Link)
if !ok {
return cid.Undef, fmt.Errorf("could not interpret %s", l)
}
return rcl.Cid, nil
}
name := path.Base(p)
entry, err := builder.BuildUnixFSDirectoryEntry(name, int64(size), l)
if err != nil {
return cid.Undef, err
}
topLevel = append(topLevel, entry)
}

// make a directory for the file(s).

root, _, err := builder.BuildUnixFSDirectory(topLevel, &ls)
if err != nil {
return cid.Undef, nil
}
rcl, ok := root.(cidlink.Link)
if !ok {
return cid.Undef, fmt.Errorf("could not interpret %s", root)
}

return rcl.Cid, nil
}
24 changes: 17 additions & 7 deletions itests/framework/framework.go
Original file line number Diff line number Diff line change
Expand Up @@ -77,9 +77,10 @@ import (
var Log = logging.Logger("boosttest")

type TestFrameworkConfig struct {
Ensemble *kit.Ensemble
EnableLegacy bool
MaxStagingBytes int64
Ensemble *kit.Ensemble
EnableLegacy bool
MaxStagingBytes int64
ProvisionalWalletBalances int64
}

type TestFramework struct {
Expand Down Expand Up @@ -117,8 +118,17 @@ func WithEnsemble(e *kit.Ensemble) FrameworkOpts {
}
}

func SetProvisionalWalletBalances(balance int64) FrameworkOpts {
return func(tmc *TestFrameworkConfig) {
tmc.ProvisionalWalletBalances = balance
}
}

func NewTestFramework(ctx context.Context, t *testing.T, opts ...FrameworkOpts) *TestFramework {
fmc := &TestFrameworkConfig{}
fmc := &TestFrameworkConfig{
// default provisional balance
ProvisionalWalletBalances: 1e18,
}
for _, opt := range opts {
opt(fmc)
}
Expand Down Expand Up @@ -224,7 +234,7 @@ func (f *TestFramework) Start(opts ...ConfigOpt) error {

clientAddr, _ = fullnodeApi.WalletNew(f.ctx, chaintypes.KTBLS)

amt := abi.NewTokenAmount(1e18)
amt := abi.NewTokenAmount(f.config.ProvisionalWalletBalances)
_ = sendFunds(f.ctx, fullnodeApi, clientAddr, amt)
Log.Infof("Created client wallet %s with %d attoFil", clientAddr, amt)
wg.Done()
Expand All @@ -239,7 +249,7 @@ func (f *TestFramework) Start(opts ...ConfigOpt) error {
Log.Info("Creating publish storage deals wallet")
psdWalletAddr, _ = fullnodeApi.WalletNew(f.ctx, chaintypes.KTBLS)

amt := abi.NewTokenAmount(1e18)
amt := abi.NewTokenAmount(f.config.ProvisionalWalletBalances)
_ = sendFunds(f.ctx, fullnodeApi, psdWalletAddr, amt)
Log.Infof("Created publish storage deals wallet %s with %d attoFil", psdWalletAddr, amt)
wg.Done()
Expand All @@ -248,7 +258,7 @@ func (f *TestFramework) Start(opts ...ConfigOpt) error {
Log.Info("Creating deal collateral wallet")
dealCollatAddr, _ = fullnodeApi.WalletNew(f.ctx, chaintypes.KTBLS)

amt := abi.NewTokenAmount(1e18)
amt := abi.NewTokenAmount(f.config.ProvisionalWalletBalances)
_ = sendFunds(f.ctx, fullnodeApi, dealCollatAddr, amt)
Log.Infof("Created deal collateral wallet %s with %d attoFil", dealCollatAddr, amt)
wg.Done()
Expand Down
3 changes: 3 additions & 0 deletions node/config/def.go
Original file line number Diff line number Diff line change
Expand Up @@ -225,6 +225,9 @@ func DefaultBoost() *Boost {
NChunks: 5,
AllowPrivateIPs: false,
},
ExperimentalConfig: ExperimentalConfig{
PodsiDataSegmentReaderBufferSizeBytes: 4e6, // 4MiB
},
}
return cfg
}
Expand Down
15 changes: 15 additions & 0 deletions node/config/doc_gen.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

9 changes: 9 additions & 0 deletions node/config/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,9 @@ type Boost struct {
LotusFees FeeConfig
DAGStore lotus_config.DAGStoreConfig
IndexProvider IndexProviderConfig

// Experimental config
ExperimentalConfig ExperimentalConfig
}

func (b *Boost) GetDealmakingConfig() lotus_config.DealmakingConfig {
Expand Down Expand Up @@ -445,3 +448,9 @@ type HttpDownloadConfig struct {
// The default is false.
AllowPrivateIPs bool
}

type ExperimentalConfig struct {
// PodsiDataSegmentReaderBufferSizeBytes sets the size of the read buffer to use for podsi deal index parsing.
// Default is 4 MiB.
PodsiDataSegmentReaderBufferSizeBytes int64
}
3 changes: 2 additions & 1 deletion node/modules/piecedirectory.go
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,8 @@ func NewPieceDirectory(cfg *config.Boost) func(lc fx.Lifecycle, maddr dtypes.Min
pdctx, cancel := context.WithCancel(context.Background())
pd := piecedirectory.NewPieceDirectory(store, sa,
cfg.LocalIndexDirectory.ParallelAddIndexLimit,
piecedirectory.WithAddIndexConcurrency(cfg.LocalIndexDirectory.AddIndexConcurrency))
piecedirectory.WithAddIndexConcurrency(cfg.LocalIndexDirectory.AddIndexConcurrency),
piecedirectory.WithDataSegmentReaderBufferSize(cfg.ExperimentalConfig.PodsiDataSegmentReaderBufferSizeBytes))
lc.Append(fx.Hook{
OnStart: func(ctx context.Context) error {
err := sa.Start(ctx, log)
Expand Down
Loading