From 523dff416b94abf6812b75cebb2a4783a765617c Mon Sep 17 00:00:00 2001 From: Steven Allen Date: Mon, 7 Oct 2024 16:01:52 -0700 Subject: [PATCH] feat(f3): Include the network name in the lease That way we don't re-use leases across networks. It's a bit racy (we ask for the manifest before we ask for the current progress) but it should be fine because at least we won't create a lease for the new network with a future instance. There's still an ABA problem if we rapidly switch back and forth between two networks but... let's just not do that? At least for the mainnet switchover, that won't be an issue because we enforce a 900 epoch silence period. I have to say, I'm not happy about this. But... we can probably just hard-code it in the future once we get rid of the dynamic manifest. --- api/api_errors.go | 10 +++++++++ api/api_full.go | 2 ++ chain/lf3/f3.go | 5 ++++- chain/lf3/participation_lease.go | 30 ++++++++++++++++++--------- chain/lf3/participation_lease_test.go | 9 ++++++-- node/modules/storageminer.go | 14 +++++++++++++ 6 files changed, 57 insertions(+), 13 deletions(-) diff --git a/api/api_errors.go b/api/api_errors.go index 65677b7680..a025e24543 100644 --- a/api/api_errors.go +++ b/api/api_errors.go @@ -16,6 +16,7 @@ const ( EF3ParticipationIssuerMismatch EF3ParticipationTooManyInstances EF3ParticipationTicketStartBeforeExisting + EF3NotReady ) var ( @@ -35,6 +36,9 @@ var ( // ErrF3ParticipationTicketStartBeforeExisting signals that participation ticket // is before the start instance of an existing lease held by the miner. ErrF3ParticipationTicketStartBeforeExisting = errF3ParticipationTicketStartBeforeExisting{} + // ErrF3NotREady signals that the F3 instance isn't ready for participation yet. The caller + // should back off and try again later. + ErrF3NotReady = errF3NotReady{} _ error = (*ErrOutOfGas)(nil) _ error = (*ErrActorNotFound)(nil) @@ -42,6 +46,7 @@ var ( _ error = (*errF3ParticipationTicketInvalid)(nil) _ error = (*errF3ParticipationTicketExpired)(nil) _ error = (*errF3ParticipationIssuerMismatch)(nil) + _ error = (*errF3NotReady)(nil) ) func init() { @@ -53,6 +58,7 @@ func init() { RPCErrors.Register(EF3ParticipationIssuerMismatch, new(*errF3ParticipationIssuerMismatch)) RPCErrors.Register(EF3ParticipationTooManyInstances, new(*errF3ParticipationTooManyInstances)) RPCErrors.Register(EF3ParticipationTicketStartBeforeExisting, new(*errF3ParticipationTicketStartBeforeExisting)) + RPCErrors.Register(EF3NotReady, new(*errF3NotReady)) } func ErrorIsIn(err error, errorTypes []error) bool { @@ -100,3 +106,7 @@ type errF3ParticipationTicketStartBeforeExisting struct{} func (errF3ParticipationTicketStartBeforeExisting) Error() string { return "ticket starts before existing lease" } + +type errF3NotReady struct{} + +func (errF3NotReady) Error() string { return "f3 isn't yet ready to participate" } diff --git a/api/api_full.go b/api/api_full.go index 83a2b7c157..34dcb6f33f 100644 --- a/api/api_full.go +++ b/api/api_full.go @@ -976,6 +976,8 @@ type F3ParticipationTicket []byte // participating in F3 consensus, detailing the session identifier, issuer, // subject, and the expiration instance. type F3ParticipationLease struct { + // Network is the name of the network this lease belongs to. + Network gpbft.NetworkName // Issuer is the identity of the node that issued the lease. Issuer peer.ID // MinerID is the actor ID of the miner that holds the lease. diff --git a/chain/lf3/f3.go b/chain/lf3/f3.go index dcb824df1c..0b702bb73a 100644 --- a/chain/lf3/f3.go +++ b/chain/lf3/f3.go @@ -79,11 +79,14 @@ func New(mctx helpers.MetricsCtx, lc fx.Lifecycle, params F3Params) (*F3, error) // maxLeasableInstances is the maximum number of leased F3 instances this node // would give out. const maxLeasableInstances = 5 + status := func() (*manifest.Manifest, gpbft.Instant) { + return module.Manifest(), module.Progress() + } fff := &F3{ inner: module, ec: ec, signer: &signer{params.Wallet}, - leaser: newParticipationLeaser(nodeId, module.Progress, maxLeasableInstances), + leaser: newParticipationLeaser(nodeId, status, maxLeasableInstances), } // Start F3 diff --git a/chain/lf3/participation_lease.go b/chain/lf3/participation_lease.go index 5c2f84757c..3df9838b97 100644 --- a/chain/lf3/participation_lease.go +++ b/chain/lf3/participation_lease.go @@ -10,23 +10,26 @@ import ( "golang.org/x/xerrors" "github.com/filecoin-project/go-f3/gpbft" + "github.com/filecoin-project/go-f3/manifest" "github.com/filecoin-project/lotus/api" ) +type f3Status = func() (*manifest.Manifest, gpbft.Instant) + type leaser struct { mutex sync.Mutex leases map[uint64]api.F3ParticipationLease issuer peer.ID - progress gpbft.Progress + status f3Status maxLeasableInstances uint64 } -func newParticipationLeaser(nodeId peer.ID, progress gpbft.Progress, maxLeasedInstances uint64) *leaser { +func newParticipationLeaser(nodeId peer.ID, status f3Status, maxLeasedInstances uint64) *leaser { return &leaser{ leases: make(map[uint64]api.F3ParticipationLease), issuer: nodeId, - progress: progress, + status: status, maxLeasableInstances: maxLeasedInstances, } } @@ -37,7 +40,11 @@ func (l *leaser) getOrRenewParticipationTicket(participant uint64, previous api. return nil, api.ErrF3ParticipationTooManyInstances } - currentInstance := l.progress().ID + manifest, instant := l.status() + if manifest == nil { + return nil, api.ErrF3NotReady + } + currentInstance := instant.ID if len(previous) != 0 { // A previous ticket is present. To avoid overlapping lease across multiple // instances for the same participant check its validity and only proceed to @@ -46,7 +53,7 @@ func (l *leaser) getOrRenewParticipationTicket(participant uint64, previous api. // - it is valid and was issued by this node. // // Otherwise, return ErrF3ParticipationIssuerMismatch to signal to the caller the need for retry. - switch _, err := l.validate(currentInstance, previous); { + switch _, err := l.validate(manifest.NetworkName, currentInstance, previous); { case errors.Is(err, api.ErrF3ParticipationTicketInvalid): // Invalid ticket means the miner must have got the ticket from a node with a potentially different version. // Refuse to issue a new ticket in case there is some other node with active lease for the miner. @@ -76,15 +83,18 @@ func (l *leaser) getOrRenewParticipationTicket(participant uint64, previous api. } func (l *leaser) participate(ticket api.F3ParticipationTicket) (api.F3ParticipationLease, error) { - currentInstance := l.progress().ID - newLease, err := l.validate(currentInstance, ticket) + manifest, instant := l.status() + if manifest == nil { + return api.F3ParticipationLease{}, api.ErrF3NotReady + } + newLease, err := l.validate(manifest.NetworkName, instant.ID, ticket) if err != nil { return api.F3ParticipationLease{}, err } l.mutex.Lock() defer l.mutex.Unlock() currentLease, found := l.leases[newLease.MinerID] - if found && currentLease.FromInstance > newLease.FromInstance { + if found && currentLease.Network == newLease.Network && currentLease.FromInstance > newLease.FromInstance { // For safety, strictly require lease start instance to never decrease. return api.F3ParticipationLease{}, api.ErrF3ParticipationTicketStartBeforeExisting } @@ -124,7 +134,7 @@ func (l *leaser) newParticipationTicket(participant uint64, from uint64, instanc return buf.Bytes(), nil } -func (l *leaser) validate(currentInstance uint64, t api.F3ParticipationTicket) (api.F3ParticipationLease, error) { +func (l *leaser) validate(currentNetwork gpbft.NetworkName, currentInstance uint64, t api.F3ParticipationTicket) (api.F3ParticipationLease, error) { var lease api.F3ParticipationLease reader := bytes.NewReader(t) if err := lease.UnmarshalCBOR(reader); err != nil { @@ -134,7 +144,7 @@ func (l *leaser) validate(currentInstance uint64, t api.F3ParticipationTicket) ( // Combine the errors to remove significance of the order by which they are // checked outside if this function. var err error - if currentInstance > lease.FromInstance+lease.ValidityTerm { + if currentNetwork != lease.Network || currentInstance > lease.FromInstance+lease.ValidityTerm { err = multierr.Append(err, api.ErrF3ParticipationTicketExpired) } if l.issuer != lease.Issuer { diff --git a/chain/lf3/participation_lease_test.go b/chain/lf3/participation_lease_test.go index 17a9c07055..731c1ba5be 100644 --- a/chain/lf3/participation_lease_test.go +++ b/chain/lf3/participation_lease_test.go @@ -2,11 +2,14 @@ package lf3 import ( "testing" + "time" + "github.com/ipfs/go-cid" "github.com/libp2p/go-libp2p/core/peer" "github.com/stretchr/testify/require" "github.com/filecoin-project/go-f3/gpbft" + "github.com/filecoin-project/go-f3/manifest" "github.com/filecoin-project/lotus/api" ) @@ -157,10 +160,12 @@ func TestLeaser(t *testing.T) { }) } +var testManifest = NewManifest("fakenet", 30, 30, 30*time.Second, cid.Undef) + type mockProgress struct{ currentInstance uint64 } -func (m *mockProgress) Progress() gpbft.Instant { - return gpbft.Instant{ +func (m *mockProgress) Progress() (*manifest.Manifest, gpbft.Instant) { + return testManifest, gpbft.Instant{ ID: m.currentInstance, Round: 0, Phase: gpbft.INITIAL_PHASE, diff --git a/node/modules/storageminer.go b/node/modules/storageminer.go index c479f8386f..9cf333e124 100644 --- a/node/modules/storageminer.go +++ b/node/modules/storageminer.go @@ -456,6 +456,20 @@ func (p *f3Participator) tryF3Participate(ctx context.Context, ticket api.F3Part func (p *f3Participator) awaitLeaseExpiry(ctx context.Context, lease api.F3ParticipationLease) error { p.backoff.Reset() for ctx.Err() == nil { + switch manifest, err := p.node.F3GetManifest(ctx); { + case errors.Is(err, api.ErrF3Disabled): + log.Errorw("Cannot await F3 participation lease expiry as F3 is disabled.", "err", err) + return xerrors.Errorf("awaiting F3 participation lease expiry: %w", err) + case err != nil: + if p.backoff.Attempt() > float64(p.maxCheckProgressAttempts) { + log.Errorw("Too many failures while attempting to check F3 progress. Restarting participation.", "attempts", p.backoff.Attempt(), "err", err) + return nil + } + log.Errorw("Failed to check F3 progress while awaiting lease expiry. Retrying after backoff.", "attempts", p.backoff.Attempt(), "backoff", p.backoff.Duration(), "err", err) + p.backOff(ctx) + case manifest.NetworkName != lease.Network: + return nil + } switch progress, err := p.node.F3GetProgress(ctx); { case errors.Is(err, api.ErrF3Disabled): log.Errorw("Cannot await F3 participation lease expiry as F3 is disabled.", "err", err)