Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Listener catch-up fix #450

Merged
merged 6 commits into from
Aug 21, 2024
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion relayer/listener.go
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ func newListener(
)
return nil, err
}
sub := vms.NewSubscriber(logger, config.ParseVM(sourceBlockchain.VM), blockchainID, ethWSClient)
sub := vms.NewSubscriber(logger, config.ParseVM(sourceBlockchain.VM), blockchainID, ethWSClient, ethRPCClient)

// Marks when the listener has finished the catch-up process on startup.
// Until that time, we do not know the order in which messages are processed,
Expand Down
44 changes: 37 additions & 7 deletions vms/evm/subscriber.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,11 +22,13 @@ const (
maxClientSubscriptionBuffer = 20000
subscribeRetryTimeout = 1 * time.Second
MaxBlocksPerRequest = 200
rpcMaxRetries = 5
)

// subscriber implements Subscriber
type subscriber struct {
ethClient ethclient.Client
wsClient ethclient.Client
rpcClient ethclient.Client
blockchainID ids.ID
headers chan *types.Header
sub interfaces.Subscription
Expand All @@ -35,10 +37,16 @@ type subscriber struct {
}

// NewSubscriber returns a subscriber
func NewSubscriber(logger logging.Logger, blockchainID ids.ID, ethClient ethclient.Client) *subscriber {
func NewSubscriber(
logger logging.Logger,
blockchainID ids.ID,
wsClient ethclient.Client,
rpcClient ethclient.Client,
) *subscriber {
return &subscriber{
blockchainID: blockchainID,
ethClient: ethClient,
wsClient: wsClient,
rpcClient: rpcClient,
logger: logger,
headers: make(chan *types.Header, maxClientSubscriptionBuffer),
}
Expand All @@ -63,7 +71,7 @@ func (s *subscriber) ProcessFromHeight(height *big.Int, done chan bool) {
}

// Grab the latest block before filtering logs so we don't miss any before updating the db
latestBlockHeight, err := s.ethClient.BlockNumber(context.Background())
latestBlockHeight, err := s.rpcClient.BlockNumber(context.Background())
if err != nil {
s.logger.Error(
"Failed to get latest block",
Expand Down Expand Up @@ -103,10 +111,10 @@ func (s *subscriber) processBlockRange(
fromBlock, toBlock *big.Int,
) error {
for i := fromBlock.Int64(); i <= toBlock.Int64(); i++ {
header, err := s.ethClient.HeaderByNumber(context.Background(), big.NewInt(i))
header, err := s.getHeaderByNumberRetryable(big.NewInt(i))
if err != nil {
s.logger.Error(
"Failed to get header by number",
"Failed to get header by number after max attempts",
zap.String("blockchainID", s.blockchainID.String()),
zap.Error(err),
)
Expand All @@ -117,6 +125,28 @@ func (s *subscriber) processBlockRange(
return nil
}

func (s *subscriber) getHeaderByNumberRetryable(headerNumber *big.Int) (*types.Header, error) {
var err error
var header *types.Header
for i := 0; i < rpcMaxRetries; i++ {
header, err = s.rpcClient.HeaderByNumber(context.Background(), headerNumber)
if err == nil {
return header, nil
}
s.logger.Warn(
"Failed to get header by number",
zap.String("blockchainID", s.blockchainID.String()),
zap.Int("attempt", i),
zap.Error(err),
)
// Sleep if this wasn't the last retry
if i < rpcMaxRetries-1 {
time.Sleep(time.Duration(i+1) * time.Second)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: it might be more clear to specify the total timeout/sleep duration (10 seconds across 5 retries) as a constant. Any reason why the subscriber timeout is a constant on each retry, but the rpc timeout grows linearly with each retry?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Agreed, we should be consistent here.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I agree but in which direction? constant? Ideally with network resources I like exponential backoffs with jitter but seemed like an overkill here.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Exponential backoff has come up in previous discussions, but we've come to similar conclusions that it's overkill. Since the overall number of RPC calls is low, optimizing how they're spaced hasn't been a top priority.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I thought we were tracking that already, but apparently not. I created #453 to do so.

}
}
return nil, err
}

// Loops forever iff maxResubscribeAttempts == 0
func (s *subscriber) Subscribe(maxResubscribeAttempts int) error {
// Retry subscribing until successful. Attempt to resubscribe maxResubscribeAttempts times
Expand Down Expand Up @@ -155,7 +185,7 @@ func (s *subscriber) Subscribe(maxResubscribeAttempts int) error {
}

func (s *subscriber) subscribe() error {
sub, err := s.ethClient.SubscribeNewHead(context.Background(), s.headers)
sub, err := s.wsClient.SubscribeNewHead(context.Background(), s.headers)
if err != nil {
s.logger.Error(
"Failed to subscribe to logs",
Expand Down
2 changes: 1 addition & 1 deletion vms/evm/subscriber_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ func makeSubscriberWithMockEthClient(t *testing.T) (*subscriber, *mock_ethclient
mockEthClient := mock_ethclient.NewMockClient(gomock.NewController(t))
blockchainID, err := ids.FromString(sourceSubnet.BlockchainID)
require.NoError(t, err)
subscriber := NewSubscriber(logger, blockchainID, mockEthClient)
subscriber := NewSubscriber(logger, blockchainID, mockEthClient, mockEthClient)

return subscriber, mockEthClient
}
Expand Down
10 changes: 8 additions & 2 deletions vms/subscriber.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,10 +39,16 @@ type Subscriber interface {
}

// NewSubscriber returns a concrete Subscriber according to the VM specified by [subnetInfo]
func NewSubscriber(logger logging.Logger, vm config.VM, blockchainID ids.ID, ethClient ethclient.Client) Subscriber {
func NewSubscriber(
logger logging.Logger,
vm config.VM,
blockchainID ids.ID,
ethWSClient ethclient.Client,
ethRPCClient ethclient.Client,
) Subscriber {
switch vm {
case config.EVM:
return evm.NewSubscriber(logger, blockchainID, ethClient)
return evm.NewSubscriber(logger, blockchainID, ethWSClient, ethRPCClient)
default:
return nil
}
Expand Down