Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Retry mismatched Rpc requests with error response #804

Merged
merged 5 commits into from
Nov 13, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
49 changes: 34 additions & 15 deletions executor/extension/validator/rpc_comparator.go
Original file line number Diff line number Diff line change
Expand Up @@ -84,16 +84,36 @@ func makeRPCComparator(cfg *utils.Config, log logger.Logger) *rpcComparator {

type rpcComparator struct {
extension.NilExtension[*rpc.RequestAndResults]
cfg *utils.Config
log logger.Logger
errors []error
cfg *utils.Config
log logger.Logger
errors []error
numberOfRetriedRequests int
totalNumberOfRequests int
}

// PostTransaction compares result with recording. If ContinueOnFailure
// is enabled error is saved. Otherwise, the error is returned.
func (c *rpcComparator) PostTransaction(state executor.State[*rpc.RequestAndResults], _ *executor.Context) error {
c.totalNumberOfRequests++

compareErr := compare(state)
if compareErr != nil {
// lot errors are recorded wrongly, for this case we resend the request and compare it again
if !state.Data.StateDB.IsRecovered && state.Data.Error != nil {
c.log.Debugf("retrying %v request", state.Data.Query.Method)
c.numberOfRetriedRequests++
c.log.Debugf("current ration retried against total %v/%v", c.numberOfRetriedRequests, c.totalNumberOfRequests)
state.Data.StateDB.IsRecovered = true
compareErr = retryRequest(state)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If I understand it correctly. If RPC request fails(/result is different), then the code attempts just 1 more time the same request? Wouldn't multiple retries be useful?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

My stance on this is to have our own opera archive running. Having multiple attempts doesn't guarantee stable result. If an error comes from the recording, a single retry connecting to a stable opera client should produce a correct result.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If I understand it correctly. If RPC request fails(/result is different), then the code attempts just 1 more time the same request? Wouldn't multiple retries be useful?

Yes and no. If comparison fails, request is send to Rpc Gateway to obtain new response. Then it is compared again. From what I have tried, this has 100% recovery rate.

if compareErr == nil {
return nil
}
}

if compareErr.typ == cannotUnmarshalResult {
return nil
}

if c.cfg.ContinueOnFailure {
c.log.Warning(compareErr)
c.errors = append(c.errors, compareErr)
Expand All @@ -108,6 +128,11 @@ func (c *rpcComparator) PostTransaction(state executor.State[*rpc.RequestAndResu

// PostRun prints all caught errors.
func (c *rpcComparator) PostRun(executor.State[*rpc.RequestAndResults], *executor.Context, error) error {
// log only if continue on failure is enabled
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I presume if error occurred and it got raised in Process then the PostRun isn't called at all. If so, does it even make call then to check in here?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There is no way Process causes error. It can possibly cause a panic and in that case this would not be called because of how executor handles panics.

if !c.cfg.ContinueOnFailure {
return nil
}

switch len(c.errors) {
case 0:
c.log.Notice("No errors found!")
Expand All @@ -122,19 +147,13 @@ func (c *rpcComparator) PostRun(executor.State[*rpc.RequestAndResults], *executo
}

func compare(state executor.State[*rpc.RequestAndResults]) *comparatorError {
var isRecovered bool

switch state.Data.Query.MethodBase {
case "getBalance":
return compareBalance(state.Data, state.Block)
case "getTransactionCount":
return compareTransactionCount(state.Data, state.Block)
case "call":
err := compareCall(state.Data, state.Block)
if err != nil && err.typ == expectedErrorGotResult && !isRecovered {
isRecovered = true
return tryRecovery(state)
}
return compareCall(state.Data, state.Block)
case "estimateGas":
// estimateGas is currently not suitable for replay since the estimation in geth is always calculated
// for current state that means recorded result and result returned by StateDB are not comparable
Expand All @@ -147,7 +166,7 @@ func compare(state executor.State[*rpc.RequestAndResults]) *comparatorError {
return nil
}

func tryRecovery(state executor.State[*rpc.RequestAndResults]) *comparatorError {
func retryRequest(state executor.State[*rpc.RequestAndResults]) *comparatorError {
payload := utils.JsonRPCRequest{
Method: state.Data.Query.Method,
Params: state.Data.Query.Params,
Expand Down Expand Up @@ -475,9 +494,9 @@ func compareStorageAt(data *rpc.RequestAndResults, block int) *comparatorError {
if data.Error != nil {
// internal error?
if data.Error.Error.Code == internalErrorCode {
return newComparatorError(dbString, data.Error.Error, data, block, internalError)
return newComparatorError(dbString, data.Error, data, block, internalError)
}
return newComparatorError(dbString, data.Error.Error, data, block, internalError)
return newComparatorError(dbString, data.Error, data, block, internalError)
}

var recordedString string
Expand Down Expand Up @@ -532,7 +551,7 @@ func newCannotSendRPCRequestErr(data *rpc.RequestAndResults, block int) *compara
"\n\tStateDB err: %v"+
"\n\tExpected result: %v"+
"\n\tExpected err: %v"+
"\n\nParams: %v", data.Query.Method, strconv.FormatInt(int64(block), 16), data.StateDB.Result, data.StateDB.Error, data.Response.Result, data.Error.Error, string(data.ParamsRaw)),
"\n\nParams: %v", data.Query.Method, strconv.FormatInt(int64(block), 16), data.StateDB.Result, data.StateDB.Error, data.Response, data.Error, string(data.ParamsRaw)),
typ: cannotSendRpcRequest,
}
}
Expand Down Expand Up @@ -580,7 +599,7 @@ func newCannotUnmarshalResult(data *rpc.RequestAndResults, block int) *comparato
"\n\tStateDB err: %v"+
"\n\tRecorded result: %v"+
"\n\tRecorded err: %v"+
"\n\nParams: %v", data.Query.Method, strconv.FormatInt(int64(block), 16), data.StateDB.Result, data.StateDB.Error, data.Response.Result, data.Error.Error, string(data.ParamsRaw)),
"\n\nParams: %v", data.Query.Method, strconv.FormatInt(int64(block), 16), data.StateDB.Result, data.StateDB.Error, data.Response, data.Error, string(data.ParamsRaw)),
typ: cannotUnmarshalResult,
}
}
Expand Down
2 changes: 1 addition & 1 deletion rpc/execute.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ import (
type StateDBData struct {
Result any
Error error
isRecovered bool
IsRecovered bool
}

func Execute(block uint64, rec *RequestAndResults, archive state.NonCommittableStateDB, cfg *utils.Config) *StateDBData {
Expand Down