From 6bc6c90f638a7c05c466756329532e3c9bf74ded Mon Sep 17 00:00:00 2001
From: Uday Patil <udpatil@gmail.com>
Date: Wed, 13 Sep 2023 10:04:45 -0500
Subject: [PATCH 01/65] Add occ todos / comments (#317)

## Describe your changes and provide context
This adds some comments with some useful code pointers for existing
logic and discussing future OCC work

## Testing performed to validate your change
NA
---
 baseapp/abci.go        | 2 ++
 baseapp/baseapp.go     | 4 ++++
 store/cachekv/store.go | 4 ++++
 3 files changed, 10 insertions(+)

diff --git a/baseapp/abci.go b/baseapp/abci.go
index 80d0c1db6..7328f7195 100644
--- a/baseapp/abci.go
+++ b/baseapp/abci.go
@@ -239,6 +239,8 @@ func (app *BaseApp) CheckTx(ctx context.Context, req *abci.RequestCheckTx) (*abc
 // Otherwise, the ResponseDeliverTx will contain releveant error information.
 // Regardless of tx execution outcome, the ResponseDeliverTx will contain relevant
 // gas execution context.
+// TODO: (occ) this is the function called from sei-chain to perform execution of a transaction.
+// We'd likely replace this with an execution task that is scheduled by the OCC scheduler
 func (app *BaseApp) DeliverTx(ctx sdk.Context, req abci.RequestDeliverTx) (res abci.ResponseDeliverTx) {
 	defer telemetry.MeasureSince(time.Now(), "abci", "deliver_tx")
 	defer func() {
diff --git a/baseapp/baseapp.go b/baseapp/baseapp.go
index 174924baf..57b60a289 100644
--- a/baseapp/baseapp.go
+++ b/baseapp/baseapp.go
@@ -821,6 +821,7 @@ func (app *BaseApp) getContextForTx(mode runTxMode, txBytes []byte) sdk.Context
 
 // cacheTxContext returns a new context based off of the provided context with
 // a branched multi-store.
+// TODO: (occ) This is an example of where we wrap the multistore with a cache multistore, and then return a modified context using that multistore
 func (app *BaseApp) cacheTxContext(ctx sdk.Context, txBytes []byte) (sdk.Context, sdk.CacheMultiStore) {
 	ms := ctx.MultiStore()
 	// TODO: https://github.com/cosmos/cosmos-sdk/issues/2824
@@ -974,6 +975,7 @@ func (app *BaseApp) runTx(ctx sdk.Context, mode runTxMode, txBytes []byte) (gInf
 			storeAccessOpEvents := msCache.GetEvents()
 			accessOps := ctx.TxMsgAccessOps()[acltypes.ANTE_MSG_INDEX]
 
+			// TODO: (occ) This is an example of where we do our current validation. Note that this validation operates on the declared dependencies for a TX / antehandler + the utilized dependencies, whereas the validation
 			missingAccessOps := ctx.MsgValidator().ValidateAccessOperations(accessOps, storeAccessOpEvents)
 			if len(missingAccessOps) != 0 {
 				for op := range missingAccessOps {
@@ -1118,6 +1120,8 @@ func (app *BaseApp) runMsgs(ctx sdk.Context, msgs []sdk.Msg, mode runTxMode) (*s
 		storeAccessOpEvents := msgMsCache.GetEvents()
 		accessOps := ctx.TxMsgAccessOps()[i]
 		missingAccessOps := ctx.MsgValidator().ValidateAccessOperations(accessOps, storeAccessOpEvents)
+		// TODO: (occ) This is where we are currently validating our per message dependencies,
+		// whereas validation will be done holistically based on the mvkv for OCC approach
 		if len(missingAccessOps) != 0 {
 			for op := range missingAccessOps {
 				ctx.Logger().Info((fmt.Sprintf("eventMsgName=%s Missing Access Operation:%s ", eventMsgName, op.String())))
diff --git a/store/cachekv/store.go b/store/cachekv/store.go
index 59cb434b4..f03ee517e 100644
--- a/store/cachekv/store.go
+++ b/store/cachekv/store.go
@@ -113,11 +113,13 @@ func (store *Store) Get(key []byte) (value []byte) {
 
 	cacheValue, ok := store.cache.Get(conv.UnsafeBytesToStr(key))
 	if !ok {
+		// TODO: (occ) This is an example of when we fall through when we dont have a cache hit. Similarly, for mvkv, we'll try to serve reads from a local cache thats transient to the TX, and if its NOT present, then we read through AND mark the access (along with the value that was read) for validation
 		value = store.parent.Get(key)
 		store.setCacheValue(key, value, false, false)
 	} else {
 		value = cacheValue.Value()
 	}
+	// TODO: (occ) This is an example of how we currently track accesses
 	store.eventManager.EmitResourceAccessReadEvent("get", store.storeKey, key, value)
 
 	return value
@@ -239,6 +241,8 @@ func (store *Store) iterator(start, end []byte, ascending bool) types.Iterator {
 	store.mtx.Lock()
 	defer store.mtx.Unlock()
 
+	// TODO: (occ) Note that for iterators, we'll need to have special handling (discussed in RFC) to ensure proper validation
+
 	var parent, cache types.Iterator
 
 	if ascending {

From b66d23ee5468ca45674f970ee13fd8d9bfc0e1e3 Mon Sep 17 00:00:00 2001
From: Uday Patil <udpatil@gmail.com>
Date: Tue, 26 Sep 2023 15:55:13 -0400
Subject: [PATCH 02/65] Multiversion Item Implementation and Tests (#318)

## Describe your changes and provide context
Add multiversion store data structures file, and implement the
multiversioned item

## Testing performed to validate your change
Added unit tests to verify behavior
---
 store/multiversion/data_structures.go      | 160 +++++++++++++++++
 store/multiversion/data_structures_test.go | 192 +++++++++++++++++++++
 2 files changed, 352 insertions(+)
 create mode 100644 store/multiversion/data_structures.go
 create mode 100644 store/multiversion/data_structures_test.go

diff --git a/store/multiversion/data_structures.go b/store/multiversion/data_structures.go
new file mode 100644
index 000000000..a382a6f0a
--- /dev/null
+++ b/store/multiversion/data_structures.go
@@ -0,0 +1,160 @@
+package multiversion
+
+import (
+	"sync"
+
+	"github.com/cosmos/cosmos-sdk/store/types"
+	"github.com/google/btree"
+)
+
+const (
+	// The approximate number of items and children per B-tree node. Tuned with benchmarks.
+	multiVersionBTreeDegree = 2 // should be equivalent to a binary search tree TODO: benchmark this
+)
+
+type MultiVersionValue interface {
+	GetLatest() (value MultiVersionValueItem, found bool)
+	GetLatestBeforeIndex(index int) (value MultiVersionValueItem, found bool)
+	Set(index int, value []byte)
+	SetEstimate(index int)
+	Delete(index int)
+}
+
+type MultiVersionValueItem interface {
+	IsDeleted() bool
+	IsEstimate() bool
+	Value() []byte
+	Index() int
+}
+
+type multiVersionItem struct {
+	valueTree *btree.BTree // contains versions values written to this key
+	mtx       sync.RWMutex // manages read + write accesses
+}
+
+var _ MultiVersionValue = (*multiVersionItem)(nil)
+
+func NewMultiVersionItem() *multiVersionItem {
+	return &multiVersionItem{
+		valueTree: btree.New(multiVersionBTreeDegree),
+	}
+}
+
+// GetLatest returns the latest written value to the btree, and returns a boolean indicating whether it was found.
+//
+// A `nil` value along with `found=true` indicates a deletion that has occurred and the underlying parent store doesn't need to be hit.
+func (item *multiVersionItem) GetLatest() (MultiVersionValueItem, bool) {
+	item.mtx.RLock()
+	defer item.mtx.RUnlock()
+
+	bTreeItem := item.valueTree.Max()
+	if bTreeItem == nil {
+		return nil, false
+	}
+	valueItem := bTreeItem.(*valueItem)
+	return valueItem, true
+}
+
+// GetLatest returns the latest written value to the btree prior to the index passed in, and returns a boolean indicating whether it was found.
+//
+// A `nil` value along with `found=true` indicates a deletion that has occurred and the underlying parent store doesn't need to be hit.
+func (item *multiVersionItem) GetLatestBeforeIndex(index int) (MultiVersionValueItem, bool) {
+	item.mtx.RLock()
+	defer item.mtx.RUnlock()
+
+	// we want to find the value at the index that is LESS than the current index
+	pivot := NewDeletedItem(index - 1)
+
+	var vItem *valueItem
+	var found bool
+	// start from pivot which contains our current index, and return on first item we hit.
+	// This will ensure we get the latest indexed value relative to our current index
+	item.valueTree.DescendLessOrEqual(pivot, func(bTreeItem btree.Item) bool {
+		vItem = bTreeItem.(*valueItem)
+		found = true
+		return false
+	})
+	return vItem, found
+}
+
+func (item *multiVersionItem) Set(index int, value []byte) {
+	types.AssertValidValue(value)
+	item.mtx.Lock()
+	defer item.mtx.Unlock()
+
+	valueItem := NewValueItem(index, value)
+	item.valueTree.ReplaceOrInsert(valueItem)
+}
+
+func (item *multiVersionItem) Delete(index int) {
+	item.mtx.Lock()
+	defer item.mtx.Unlock()
+
+	deletedItem := NewDeletedItem(index)
+	item.valueTree.ReplaceOrInsert(deletedItem)
+}
+
+func (item *multiVersionItem) SetEstimate(index int) {
+	item.mtx.Lock()
+	defer item.mtx.Unlock()
+
+	estimateItem := NewEstimateItem(index)
+	item.valueTree.ReplaceOrInsert(estimateItem)
+}
+
+type valueItem struct {
+	index    int
+	value    []byte
+	estimate bool
+}
+
+var _ MultiVersionValueItem = (*valueItem)(nil)
+
+// Index implements MultiVersionValueItem.
+func (v *valueItem) Index() int {
+	return v.index
+}
+
+// IsDeleted implements MultiVersionValueItem.
+func (v *valueItem) IsDeleted() bool {
+	return v.value == nil && !v.estimate
+}
+
+// IsEstimate implements MultiVersionValueItem.
+func (v *valueItem) IsEstimate() bool {
+	return v.estimate
+}
+
+// Value implements MultiVersionValueItem.
+func (v *valueItem) Value() []byte {
+	return v.value
+}
+
+// implement Less for btree.Item for valueItem
+func (i *valueItem) Less(other btree.Item) bool {
+	return i.index < other.(*valueItem).index
+}
+
+func NewValueItem(index int, value []byte) *valueItem {
+	return &valueItem{
+		index:    index,
+		value:    value,
+		estimate: false,
+	}
+}
+
+func NewEstimateItem(index int) *valueItem {
+	return &valueItem{
+		index:    index,
+		value:    nil,
+		estimate: true,
+	}
+}
+
+func NewDeletedItem(index int) *valueItem {
+	return &valueItem{
+		index:    index,
+		value:    nil,
+		estimate: false,
+	}
+}
diff --git a/store/multiversion/data_structures_test.go b/store/multiversion/data_structures_test.go
new file mode 100644
index 000000000..92975462d
--- /dev/null
+++ b/store/multiversion/data_structures_test.go
@@ -0,0 +1,192 @@
+package multiversion_test
+
+import (
+	"testing"
+
+	mv "github.com/cosmos/cosmos-sdk/store/multiversion"
+	"github.com/stretchr/testify/require"
+)
+
+func TestMultiversionItemGetLatest(t *testing.T) {
+	mvItem := mv.NewMultiVersionItem()
+	// We have no value, should get found == false and a nil value
+	value, found := mvItem.GetLatest()
+	require.False(t, found)
+	require.Nil(t, value)
+
+	// assert that we find a value after it's set
+	one := []byte("one")
+	mvItem.Set(1, one)
+	value, found = mvItem.GetLatest()
+	require.True(t, found)
+	require.Equal(t, one, value.Value())
+
+	// assert that we STILL get the "one" value since it is the latest
+	zero := []byte("zero")
+	mvItem.Set(0, zero)
+	value, found = mvItem.GetLatest()
+	require.True(t, found)
+	require.Equal(t, one, value.Value())
+
+	// we should see a deletion as the latest now, aka nil value and found == true
+	mvItem.Delete(2)
+	value, found = mvItem.GetLatest()
+	require.True(t, found)
+	require.True(t, value.IsDeleted())
+	require.Nil(t, value.Value())
+
+	// Overwrite the deleted value with some data
+	two := []byte("two")
+	mvItem.Set(2, two)
+	value, found = mvItem.GetLatest()
+	require.True(t, found)
+	require.Equal(t, two, value.Value())
+}
+
+func TestMultiversionItemGetByIndex(t *testing.T) {
+	mvItem := mv.NewMultiVersionItem()
+	// We have no value, should get found == false and a nil value
+	value, found := mvItem.GetLatestBeforeIndex(9)
+	require.False(t, found)
+	require.Nil(t, value)
+
+	// assert that we find a value after it's set
+	one := []byte("one")
+	mvItem.Set(1, one)
+	// should not be found because we specifically search "LESS THAN"
+	value, found = mvItem.GetLatestBeforeIndex(1)
+	require.False(t, found)
+	require.Nil(t, value)
+	// querying from "two" should be found
+	value, found = mvItem.GetLatestBeforeIndex(2)
+	require.True(t, found)
+	require.Equal(t, one, value.Value())
+
+	// verify that querying for an earlier index returns nil
+	value, found = mvItem.GetLatestBeforeIndex(0)
+	require.False(t, found)
+	require.Nil(t, value)
+
+	// assert that we STILL get the "one" value when querying with a later index
+	zero := []byte("zero")
+	mvItem.Set(0, zero)
+	// verify that querying for zero should ALWAYS return nil
+	value, found = mvItem.GetLatestBeforeIndex(0)
+	require.False(t, found)
+	require.Nil(t, value)
+
+	value, found = mvItem.GetLatestBeforeIndex(2)
+	require.True(t, found)
+	require.Equal(t, one, value.Value())
+	// verify we get zero when querying with index 1
+	value, found = mvItem.GetLatestBeforeIndex(1)
+	require.True(t, found)
+	require.Equal(t, zero, value.Value())
+
+	// we should see a deletion as the latest now, aka nil value and found == true, but index 4 still returns `one`
+	mvItem.Delete(4)
+	value, found = mvItem.GetLatestBeforeIndex(4)
+	require.True(t, found)
+	require.Equal(t, one, value.Value())
+	// should get deletion item for a later index
+	value, found = mvItem.GetLatestBeforeIndex(5)
+	require.True(t, found)
+	require.True(t, value.IsDeleted())
+
+	// verify that we still read the proper underlying item for an older index
+	value, found = mvItem.GetLatestBeforeIndex(3)
+	require.True(t, found)
+	require.Equal(t, one, value.Value())
+
+	// Overwrite the deleted value with some data and verify we read it properly
+	four := []byte("four")
+	mvItem.Set(4, four)
+	// also reads the four
+	value, found = mvItem.GetLatestBeforeIndex(6)
+	require.True(t, found)
+	require.Equal(t, four, value.Value())
+	// still reads the `one`
+	value, found = mvItem.GetLatestBeforeIndex(4)
+	require.True(t, found)
+	require.Equal(t, one, value.Value())
+}
+
+func TestMultiversionItemEstimate(t *testing.T) {
+	mvItem := mv.NewMultiVersionItem()
+	// We have no value, should get found == false and a nil value
+	value, found := mvItem.GetLatestBeforeIndex(9)
+	require.False(t, found)
+	require.Nil(t, value)
+
+	// assert that we find a value after it's set
+	one := []byte("one")
+	mvItem.Set(1, one)
+	// should not be found because we specifically search "LESS THAN"
+	value, found = mvItem.GetLatestBeforeIndex(1)
+	require.False(t, found)
+	require.Nil(t, value)
+	// querying from "two" should be found
+	value, found = mvItem.GetLatestBeforeIndex(2)
+	require.True(t, found)
+	require.False(t, value.IsEstimate())
+	require.Equal(t, one, value.Value())
+	// set as estimate
+	mvItem.SetEstimate(1)
+	// should not be found because we specifically search "LESS THAN"
+	value, found = mvItem.GetLatestBeforeIndex(1)
+	require.False(t, found)
+	require.Nil(t, value)
+	// querying from "two" should be found as ESTIMATE
+	value, found = mvItem.GetLatestBeforeIndex(2)
+	require.True(t, found)
+	require.True(t, value.IsEstimate())
+
+	// verify that querying for an earlier index returns nil
+	value, found = mvItem.GetLatestBeforeIndex(0)
+	require.False(t, found)
+	require.Nil(t, value)
+
+	// assert that we STILL get the "one" value when querying with a later index
+	zero := []byte("zero")
+	mvItem.Set(0, zero)
+	// verify that querying for zero should ALWAYS return nil
+	value, found = mvItem.GetLatestBeforeIndex(0)
+	require.False(t, found)
+	require.Nil(t, value)
+
+	value, found = mvItem.GetLatestBeforeIndex(2)
+	require.True(t, found)
+	require.True(t, value.IsEstimate())
+	// verify we get zero when querying with index 1
+	value, found = mvItem.GetLatestBeforeIndex(1)
+	require.True(t, found)
+	require.Equal(t, zero, value.Value())
+	// reset one to no longer be an estiamte
+	mvItem.Set(1, one)
+	// we should see a deletion as the latest now, aka nil value and found == true, but index 4 still returns `one`
+	mvItem.Delete(4)
+	value, found = mvItem.GetLatestBeforeIndex(4)
+	require.True(t, found)
+	require.Equal(t, one, value.Value())
+	// should get deletion item for a later index
+	value, found = mvItem.GetLatestBeforeIndex(5)
+	require.True(t, found)
+	require.True(t, value.IsDeleted())
+
+	// verify that we still read the proper underlying item for an older index
+	value, found = mvItem.GetLatestBeforeIndex(3)
+	require.True(t, found)
+	require.Equal(t, one, value.Value())
+
+	// Overwrite the deleted value with an estimate and verify we read it properly
+	mvItem.SetEstimate(4)
+	// also reads the four
+	value, found = mvItem.GetLatestBeforeIndex(6)
+	require.True(t, found)
+	require.True(t, value.IsEstimate())
+	require.False(t, value.IsDeleted())
+	// still reads the `one`
+	value, found = mvItem.GetLatestBeforeIndex(4)
+	require.True(t, found)
+	require.Equal(t, one, value.Value())
+}

From 0048776244b3f61a27926961fbe3a0390f7bb7b9 Mon Sep 17 00:00:00 2001
From: Uday Patil <udpatil@gmail.com>
Date: Fri, 29 Sep 2023 15:28:02 -0400
Subject: [PATCH 03/65] [occ] Add incarnation field (#321)

## Describe your changes and provide context
This adds the incarnation field to the multiversion item data structure.

## Testing performed to validate your change
updated unit tests
---
 store/multiversion/data_structures.go      | 60 +++++++++++++---------
 store/multiversion/data_structures_test.go | 36 ++++++++-----
 2 files changed, 57 insertions(+), 39 deletions(-)

diff --git a/store/multiversion/data_structures.go b/store/multiversion/data_structures.go
index a382a6f0a..c4ca7b995 100644
--- a/store/multiversion/data_structures.go
+++ b/store/multiversion/data_structures.go
@@ -15,15 +15,16 @@ const (
 type MultiVersionValue interface {
 	GetLatest() (value MultiVersionValueItem, found bool)
 	GetLatestBeforeIndex(index int) (value MultiVersionValueItem, found bool)
-	Set(index int, value []byte)
-	SetEstimate(index int)
-	Delete(index int)
+	Set(index int, incarnation int, value []byte)
+	SetEstimate(index int, incarnation int)
+	Delete(index int, incarnation int)
 }
 
 type MultiVersionValueItem interface {
 	IsDeleted() bool
 	IsEstimate() bool
 	Value() []byte
+	Incarnation() int
 	Index() int
 }
 
@@ -63,7 +64,7 @@ func (item *multiVersionItem) GetLatestBeforeIndex(index int) (MultiVersionValue
 	defer item.mtx.RUnlock()
 
 	// we want to find the value at the index that is LESS than the current index
-	pivot := NewDeletedItem(index - 1)
+	pivot := &valueItem{index: index - 1}
 
 	var vItem *valueItem
 	var found bool
@@ -77,35 +78,36 @@ func (item *multiVersionItem) GetLatestBeforeIndex(index int) (MultiVersionValue
 	return vItem, found
 }
 
-func (item *multiVersionItem) Set(index int, value []byte) {
+func (item *multiVersionItem) Set(index int, incarnation int, value []byte) {
 	types.AssertValidValue(value)
 	item.mtx.Lock()
 	defer item.mtx.Unlock()
 
-	valueItem := NewValueItem(index, value)
+	valueItem := NewValueItem(index, incarnation, value)
 	item.valueTree.ReplaceOrInsert(valueItem)
 }
 
-func (item *multiVersionItem) Delete(index int) {
+func (item *multiVersionItem) Delete(index int, incarnation int) {
 	item.mtx.Lock()
 	defer item.mtx.Unlock()
 
-	deletedItem := NewDeletedItem(index)
+	deletedItem := NewDeletedItem(index, incarnation)
 	item.valueTree.ReplaceOrInsert(deletedItem)
 }
 
-func (item *multiVersionItem) SetEstimate(index int) {
+func (item *multiVersionItem) SetEstimate(index int, incarnation int) {
 	item.mtx.Lock()
 	defer item.mtx.Unlock()
 
-	estimateItem := NewEstimateItem(index)
+	estimateItem := NewEstimateItem(index, incarnation)
 	item.valueTree.ReplaceOrInsert(estimateItem)
 }
 
 type valueItem struct {
-	index    int
-	value    []byte
-	estimate bool
+	index       int
+	incarnation int
+	value       []byte
+	estimate    bool
 }
 
 var _ MultiVersionValueItem = (*valueItem)(nil)
@@ -115,6 +117,11 @@ func (v *valueItem) Index() int {
 	return v.index
 }
 
+// Incarnation implements MultiVersionValueItem.
+func (v *valueItem) Incarnation() int {
+	return v.incarnation
+}
+
 // IsDeleted implements MultiVersionValueItem.
 func (v *valueItem) IsDeleted() bool {
 	return v.value == nil && !v.estimate
@@ -135,26 +142,29 @@ func (i *valueItem) Less(other btree.Item) bool {
 	return i.index < other.(*valueItem).index
 }
 
-func NewValueItem(index int, value []byte) *valueItem {
+func NewValueItem(index int, incarnation int, value []byte) *valueItem {
 	return &valueItem{
-		index:    index,
-		value:    value,
-		estimate: false,
+		index:       index,
+		incarnation: incarnation,
+		value:       value,
+		estimate:    false,
 	}
 }
 
-func NewEstimateItem(index int) *valueItem {
+func NewEstimateItem(index int, incarnation int) *valueItem {
 	return &valueItem{
-		index:    index,
-		value:    nil,
-		estimate: true,
+		index:       index,
+		incarnation: incarnation,
+		value:       nil,
+		estimate:    true,
 	}
 }
 
-func NewDeletedItem(index int) *valueItem {
+func NewDeletedItem(index int, incarnation int) *valueItem {
 	return &valueItem{
-		index:    index,
-		value:    nil,
-		estimate: false,
+		index:       index,
+		incarnation: incarnation,
+		value:       nil,
+		estimate:    false,
 	}
 }
diff --git a/store/multiversion/data_structures_test.go b/store/multiversion/data_structures_test.go
index 92975462d..31696d366 100644
--- a/store/multiversion/data_structures_test.go
+++ b/store/multiversion/data_structures_test.go
@@ -16,20 +16,22 @@ func TestMultiversionItemGetLatest(t *testing.T) {
 
 	// assert that we find a value after it's set
 	one := []byte("one")
-	mvItem.Set(1, one)
+	mvItem.Set(1, 0, one)
 	value, found = mvItem.GetLatest()
 	require.True(t, found)
 	require.Equal(t, one, value.Value())
 
 	// assert that we STILL get the "one" value since it is the latest
 	zero := []byte("zero")
-	mvItem.Set(0, zero)
+	mvItem.Set(0, 0, zero)
 	value, found = mvItem.GetLatest()
 	require.True(t, found)
 	require.Equal(t, one, value.Value())
+	require.Equal(t, 1, value.Index())
+	require.Equal(t, 0, value.Incarnation())
 
 	// we should see a deletion as the latest now, aka nil value and found == true
-	mvItem.Delete(2)
+	mvItem.Delete(2, 0)
 	value, found = mvItem.GetLatest()
 	require.True(t, found)
 	require.True(t, value.IsDeleted())
@@ -37,10 +39,12 @@ func TestMultiversionItemGetLatest(t *testing.T) {
 
 	// Overwrite the deleted value with some data
 	two := []byte("two")
-	mvItem.Set(2, two)
+	mvItem.Set(2, 3, two)
 	value, found = mvItem.GetLatest()
 	require.True(t, found)
 	require.Equal(t, two, value.Value())
+	require.Equal(t, 2, value.Index())
+	require.Equal(t, 3, value.Incarnation())
 }
 
 func TestMultiversionItemGetByIndex(t *testing.T) {
@@ -52,7 +56,7 @@ func TestMultiversionItemGetByIndex(t *testing.T) {
 
 	// assert that we find a value after it's set
 	one := []byte("one")
-	mvItem.Set(1, one)
+	mvItem.Set(1, 0, one)
 	// should not be found because we specifically search "LESS THAN"
 	value, found = mvItem.GetLatestBeforeIndex(1)
 	require.False(t, found)
@@ -69,7 +73,7 @@ func TestMultiversionItemGetByIndex(t *testing.T) {
 
 	// assert that we STILL get the "one" value when querying with a later index
 	zero := []byte("zero")
-	mvItem.Set(0, zero)
+	mvItem.Set(0, 0, zero)
 	// verify that querying for zero should ALWAYS return nil
 	value, found = mvItem.GetLatestBeforeIndex(0)
 	require.False(t, found)
@@ -84,7 +88,7 @@ func TestMultiversionItemGetByIndex(t *testing.T) {
 	require.Equal(t, zero, value.Value())
 
 	// we should see a deletion as the latest now, aka nil value and found == true, but index 4 still returns `one`
-	mvItem.Delete(4)
+	mvItem.Delete(4, 0)
 	value, found = mvItem.GetLatestBeforeIndex(4)
 	require.True(t, found)
 	require.Equal(t, one, value.Value())
@@ -100,7 +104,7 @@ func TestMultiversionItemGetByIndex(t *testing.T) {
 
 	// Overwrite the deleted value with some data and verify we read it properly
 	four := []byte("four")
-	mvItem.Set(4, four)
+	mvItem.Set(4, 0, four)
 	// also reads the four
 	value, found = mvItem.GetLatestBeforeIndex(6)
 	require.True(t, found)
@@ -120,7 +124,7 @@ func TestMultiversionItemEstimate(t *testing.T) {
 
 	// assert that we find a value after it's set
 	one := []byte("one")
-	mvItem.Set(1, one)
+	mvItem.Set(1, 0, one)
 	// should not be found because we specifically search "LESS THAN"
 	value, found = mvItem.GetLatestBeforeIndex(1)
 	require.False(t, found)
@@ -131,7 +135,7 @@ func TestMultiversionItemEstimate(t *testing.T) {
 	require.False(t, value.IsEstimate())
 	require.Equal(t, one, value.Value())
 	// set as estimate
-	mvItem.SetEstimate(1)
+	mvItem.SetEstimate(1, 2)
 	// should not be found because we specifically search "LESS THAN"
 	value, found = mvItem.GetLatestBeforeIndex(1)
 	require.False(t, found)
@@ -140,6 +144,8 @@ func TestMultiversionItemEstimate(t *testing.T) {
 	value, found = mvItem.GetLatestBeforeIndex(2)
 	require.True(t, found)
 	require.True(t, value.IsEstimate())
+	require.Equal(t, 1, value.Index())
+	require.Equal(t, 2, value.Incarnation())
 
 	// verify that querying for an earlier index returns nil
 	value, found = mvItem.GetLatestBeforeIndex(0)
@@ -148,7 +154,7 @@ func TestMultiversionItemEstimate(t *testing.T) {
 
 	// assert that we STILL get the "one" value when querying with a later index
 	zero := []byte("zero")
-	mvItem.Set(0, zero)
+	mvItem.Set(0, 0, zero)
 	// verify that querying for zero should ALWAYS return nil
 	value, found = mvItem.GetLatestBeforeIndex(0)
 	require.False(t, found)
@@ -162,9 +168,9 @@ func TestMultiversionItemEstimate(t *testing.T) {
 	require.True(t, found)
 	require.Equal(t, zero, value.Value())
 	// reset one to no longer be an estiamte
-	mvItem.Set(1, one)
+	mvItem.Set(1, 0, one)
 	// we should see a deletion as the latest now, aka nil value and found == true, but index 4 still returns `one`
-	mvItem.Delete(4)
+	mvItem.Delete(4, 1)
 	value, found = mvItem.GetLatestBeforeIndex(4)
 	require.True(t, found)
 	require.Equal(t, one, value.Value())
@@ -172,6 +178,8 @@ func TestMultiversionItemEstimate(t *testing.T) {
 	value, found = mvItem.GetLatestBeforeIndex(5)
 	require.True(t, found)
 	require.True(t, value.IsDeleted())
+	require.Equal(t, 4, value.Index())
+	require.Equal(t, 1, value.Incarnation())
 
 	// verify that we still read the proper underlying item for an older index
 	value, found = mvItem.GetLatestBeforeIndex(3)
@@ -179,7 +187,7 @@ func TestMultiversionItemEstimate(t *testing.T) {
 	require.Equal(t, one, value.Value())
 
 	// Overwrite the deleted value with an estimate and verify we read it properly
-	mvItem.SetEstimate(4)
+	mvItem.SetEstimate(4, 0)
 	// also reads the four
 	value, found = mvItem.GetLatestBeforeIndex(6)
 	require.True(t, found)

From 5d8941c3e73954bd5ff073949e50ed3d94e9a526 Mon Sep 17 00:00:00 2001
From: Uday Patil <udpatil@gmail.com>
Date: Thu, 5 Oct 2023 21:18:11 -0500
Subject: [PATCH 04/65] [occ] Implement basic multiversion store (#322)

## Describe your changes and provide context
This implements the multiversion with basic functionality, but still
needs additional work to implement the iterator functionality and/or
persisting readsets for validation

## Testing performed to validate your change
Added unit tests for basic multiversion store
---
 store/multiversion/store.go      | 120 +++++++++++++++++++++++++++++++
 store/multiversion/store_test.go |  54 ++++++++++++++
 2 files changed, 174 insertions(+)
 create mode 100644 store/multiversion/store.go
 create mode 100644 store/multiversion/store_test.go

diff --git a/store/multiversion/store.go b/store/multiversion/store.go
new file mode 100644
index 000000000..b52c6af1a
--- /dev/null
+++ b/store/multiversion/store.go
@@ -0,0 +1,120 @@
+package multiversion
+
+import (
+	"sync"
+)
+
+type MultiVersionStore interface {
+	GetLatest(key []byte) (value MultiVersionValueItem)
+	GetLatestBeforeIndex(index int, key []byte) (value MultiVersionValueItem)
+	Set(index int, incarnation int, key []byte, value []byte)
+	SetEstimate(index int, incarnation int, key []byte)
+	Delete(index int, incarnation int, key []byte)
+	Has(index int, key []byte) bool
+	// TODO: do we want to add helper functions for validations with readsets / applying writesets ?
+}
+
+type Store struct {
+	mtx sync.RWMutex
+	// map that stores the key -> MultiVersionValue mapping for accessing from a given key
+	multiVersionMap map[string]MultiVersionValue
+	// TODO: do we need to add something here to persist readsets for later validation
+	// TODO: we need to support iterators as well similar to how cachekv does it
+	// TODO: do we need secondary indexing on index -> keys - this way if we need to abort we can replace those keys with ESTIMATE values? - maybe this just means storing writeset
+}
+
+func NewMultiVersionStore() *Store {
+	return &Store{
+		multiVersionMap: make(map[string]MultiVersionValue),
+	}
+}
+
+// GetLatest implements MultiVersionStore.
+func (s *Store) GetLatest(key []byte) (value MultiVersionValueItem) {
+	s.mtx.RLock()
+	defer s.mtx.RUnlock()
+
+	keyString := string(key)
+	// if the key doesn't exist in the overall map, return nil
+	if _, ok := s.multiVersionMap[keyString]; !ok {
+		return nil
+	}
+	val, found := s.multiVersionMap[keyString].GetLatest()
+	if !found {
+		return nil // this shouldn't be possible
+	}
+	return val
+}
+
+// GetLatestBeforeIndex implements MultiVersionStore.
+func (s *Store) GetLatestBeforeIndex(index int, key []byte) (value MultiVersionValueItem) {
+	s.mtx.RLock()
+	defer s.mtx.RUnlock()
+
+	keyString := string(key)
+	// if the key doesn't exist in the overall map, return nil
+	if _, ok := s.multiVersionMap[keyString]; !ok {
+		return nil
+	}
+	val, found := s.multiVersionMap[keyString].GetLatestBeforeIndex(index)
+	// otherwise, we may have found a value for that key, but its not written before the index passed in
+	if !found {
+		return nil
+	}
+	// found a value prior to the passed in index, return that value (could be estimate OR deleted, but it is a definitive value)
+	return val
+}
+
+// Has implements MultiVersionStore. It checks if the key exists in the multiversion store at or before the specified index.
+func (s *Store) Has(index int, key []byte) bool {
+	s.mtx.RLock()
+	defer s.mtx.RUnlock()
+
+	keyString := string(key)
+	if _, ok := s.multiVersionMap[keyString]; !ok {
+		return false // this is okay because the caller of this will THEN need to access the parent store to verify that the key doesnt exist there
+	}
+	_, found := s.multiVersionMap[keyString].GetLatestBeforeIndex(index)
+	return found
+}
+
+// This function will try to intialize the multiversion item if it doesn't exist for a key specified by byte array
+// NOTE: this should be used within an acquired mutex lock
+func (s *Store) tryInitMultiVersionItem(keyString string) {
+	if _, ok := s.multiVersionMap[keyString]; !ok {
+		multiVersionValue := NewMultiVersionItem()
+		s.multiVersionMap[keyString] = multiVersionValue
+	}
+}
+
+// Set implements MultiVersionStore.
+func (s *Store) Set(index int, incarnation int, key []byte, value []byte) {
+	s.mtx.Lock()
+	defer s.mtx.Unlock()
+
+	keyString := string(key)
+	s.tryInitMultiVersionItem(keyString)
+	s.multiVersionMap[keyString].Set(index, incarnation, value)
+}
+
+// SetEstimate implements MultiVersionStore.
+func (s *Store) SetEstimate(index int, incarnation int, key []byte) {
+	s.mtx.Lock()
+	defer s.mtx.Unlock()
+
+	keyString := string(key)
+	s.tryInitMultiVersionItem(keyString)
+	s.multiVersionMap[keyString].SetEstimate(index, incarnation)
+}
+
+// Delete implements MultiVersionStore.
+func (s *Store) Delete(index int, incarnation int, key []byte) {
+	s.mtx.Lock()
+	defer s.mtx.Unlock()
+
+	keyString := string(key)
+	s.tryInitMultiVersionItem(keyString)
+	s.multiVersionMap[keyString].Delete(index, incarnation)
+}
+
+var _ MultiVersionStore = (*Store)(nil)
diff --git a/store/multiversion/store_test.go b/store/multiversion/store_test.go
new file mode 100644
index 000000000..91465c435
--- /dev/null
+++ b/store/multiversion/store_test.go
@@ -0,0 +1,54 @@
+package multiversion_test
+
+import (
+	"testing"
+
+	"github.com/cosmos/cosmos-sdk/store/multiversion"
+	"github.com/stretchr/testify/require"
+)
+
+func TestMultiVersionStore(t *testing.T) {
+	store := multiversion.NewMultiVersionStore()
+
+	// Test Set and GetLatest
+	store.Set(1, 1, []byte("key1"), []byte("value1"))
+	store.Set(2, 1, []byte("key1"), []byte("value2"))
+	store.Set(3, 1, []byte("key2"), []byte("value3"))
+	require.Equal(t, []byte("value2"), store.GetLatest([]byte("key1")).Value())
+	require.Equal(t, []byte("value3"), store.GetLatest([]byte("key2")).Value())
+
+	// Test SetEstimate
+	store.SetEstimate(4, 1, []byte("key1"))
+	require.True(t, store.GetLatest([]byte("key1")).IsEstimate())
+
+	// Test Delete
+	store.Delete(5, 1, []byte("key1"))
+	require.True(t, store.GetLatest([]byte("key1")).IsDeleted())
+
+	// Test GetLatestBeforeIndex
+	store.Set(6, 1, []byte("key1"), []byte("value4"))
+	require.True(t, store.GetLatestBeforeIndex(5, []byte("key1")).IsEstimate())
+	require.Equal(t, []byte("value4"), store.GetLatestBeforeIndex(7, []byte("key1")).Value())
+
+	// Test Has
+	require.True(t, store.Has(2, []byte("key1")))
+	require.False(t, store.Has(0, []byte("key1")))
+	require.False(t, store.Has(5, []byte("key4")))
+}
+
+func TestMultiVersionStoreHasLaterValue(t *testing.T) {
+	store := multiversion.NewMultiVersionStore()
+
+	store.Set(5, 1, []byte("key1"), []byte("value2"))
+
+	require.Nil(t, store.GetLatestBeforeIndex(4, []byte("key1")))
+	require.Equal(t, []byte("value2"), store.GetLatestBeforeIndex(6, []byte("key1")).Value())
+}
+
+func TestMultiVersionStoreKeyDNE(t *testing.T) {
+	store := multiversion.NewMultiVersionStore()
+
+	require.Nil(t, store.GetLatest([]byte("key1")))
+	require.Nil(t, store.GetLatestBeforeIndex(0, []byte("key1")))
+	require.False(t, store.Has(0, []byte("key1")))
+}

From dac5f7b3448cc9d7a4264288b0da1b1bbc7dbae7 Mon Sep 17 00:00:00 2001
From: Steven Landers <steven@seinetwork.io>
Date: Mon, 9 Oct 2023 10:48:27 -0400
Subject: [PATCH 05/65] [occ] Add concurrency worker configuration (#324)

## Describe your changes and provide context
- `ConcurrencyWorkers` represents the number of workers to use for
concurrent transactions
- since concurrrency-workers is a baseapp-level setting, implementations
(like sei-chain) shouldn't have to pass it (but can)
- it defaults to 10 if not set (via cli default value)
- it defaults to 10 in app.toml only if that file is being created (and
doesn't exist)
- if explicitly set to zero on command line, it will override with the
default (for safety)
- cli takes precedence over the config file
- no one has to do anything to get it to be 10 (no config changes no
sei-chain changes required (aside from new cosmos version))

## Testing performed to validate your change
- Unit Tests for setting the value
- Manually testing scenarios with sei-chain
---
 baseapp/baseapp.go           | 21 ++++++++++++++++++++-
 baseapp/options.go           | 11 +++++++++++
 server/config/config.go      |  9 +++++++++
 server/config/config_test.go |  5 +++++
 server/config/toml.go        |  3 +++
 server/start.go              |  2 ++
 6 files changed, 50 insertions(+), 1 deletion(-)

diff --git a/baseapp/baseapp.go b/baseapp/baseapp.go
index 57b60a289..2fd2e89f8 100644
--- a/baseapp/baseapp.go
+++ b/baseapp/baseapp.go
@@ -15,6 +15,7 @@ import (
 	"go.opentelemetry.io/otel/trace"
 
 	"github.com/armon/go-metrics"
+	"github.com/cosmos/cosmos-sdk/server/config"
 	"github.com/cosmos/cosmos-sdk/utils/tracing"
 	"github.com/gogo/protobuf/proto"
 	sdbm "github.com/sei-protocol/sei-tm-db/backends"
@@ -60,7 +61,8 @@ const (
 	FlagArchivalArweaveIndexDBFullPath = "archival-arweave-index-db-full-path"
 	FlagArchivalArweaveNodeURL         = "archival-arweave-node-url"
 
-	FlagChainID = "chain-id"
+	FlagChainID            = "chain-id"
+	FlagConcurrencyWorkers = "concurrency-workers"
 )
 
 var (
@@ -168,6 +170,8 @@ type BaseApp struct { //nolint: maligned
 	TmConfig *tmcfg.Config
 
 	TracingInfo *tracing.Info
+
+	concurrencyWorkers int
 }
 
 type appStore struct {
@@ -294,6 +298,16 @@ func NewBaseApp(
 		app.cms.(*rootmulti.Store).SetOrphanConfig(app.orphanConfig)
 	}
 
+	// if no option overrode already, initialize to the flags value
+	// this avoids forcing every implementation to pass an option, but allows it
+	if app.concurrencyWorkers == 0 {
+		app.concurrencyWorkers = cast.ToInt(appOpts.Get(FlagConcurrencyWorkers))
+	}
+	// safely default this to the default value if 0
+	if app.concurrencyWorkers == 0 {
+		app.concurrencyWorkers = config.DefaultConcurrencyWorkers
+	}
+
 	return app
 }
 
@@ -307,6 +321,11 @@ func (app *BaseApp) AppVersion() uint64 {
 	return app.appVersion
 }
 
+// ConcurrencyWorkers returns the number of concurrent workers for the BaseApp.
+func (app *BaseApp) ConcurrencyWorkers() int {
+	return app.concurrencyWorkers
+}
+
 // Version returns the application's version string.
 func (app *BaseApp) Version() string {
 	return app.version
diff --git a/baseapp/options.go b/baseapp/options.go
index 3eac7f812..1ca017bf6 100644
--- a/baseapp/options.go
+++ b/baseapp/options.go
@@ -87,6 +87,10 @@ func SetSnapshotInterval(interval uint64) func(*BaseApp) {
 	return func(app *BaseApp) { app.SetSnapshotInterval(interval) }
 }
 
+func SetConcurrencyWorkers(workers int) func(*BaseApp) {
+	return func(app *BaseApp) { app.SetConcurrencyWorkers(workers) }
+}
+
 // SetSnapshotKeepRecent sets the recent snapshots to keep.
 func SetSnapshotKeepRecent(keepRecent uint32) func(*BaseApp) {
 	return func(app *BaseApp) { app.SetSnapshotKeepRecent(keepRecent) }
@@ -295,6 +299,13 @@ func (app *BaseApp) SetSnapshotInterval(snapshotInterval uint64) {
 	app.snapshotInterval = snapshotInterval
 }
 
+func (app *BaseApp) SetConcurrencyWorkers(workers int) {
+	if app.sealed {
+		panic("SetConcurrencyWorkers() on sealed BaseApp")
+	}
+	app.concurrencyWorkers = workers
+}
+
 // SetSnapshotKeepRecent sets the number of recent snapshots to keep.
 func (app *BaseApp) SetSnapshotKeepRecent(snapshotKeepRecent uint32) {
 	if app.sealed {
diff --git a/server/config/config.go b/server/config/config.go
index 9a794cd08..33bc3ff98 100644
--- a/server/config/config.go
+++ b/server/config/config.go
@@ -21,6 +21,9 @@ const (
 
 	// DefaultGRPCWebAddress defines the default address to bind the gRPC-web server to.
 	DefaultGRPCWebAddress = "0.0.0.0:9091"
+
+	// DefaultConcurrencyWorkers defines the default workers to use for concurrent transactions
+	DefaultConcurrencyWorkers = 10
 )
 
 // BaseConfig defines the server's basic configuration
@@ -88,6 +91,10 @@ type BaseConfig struct {
 	SeparateOrphanVersionsToKeep int64  `mapstructure:"separate-orphan-versions-to-keep"`
 	NumOrphanPerFile             int    `mapstructure:"num-orphan-per-file"`
 	OrphanDirectory              string `mapstructure:"orphan-dir"`
+
+	// ConcurrencyWorkers defines the number of workers to use for concurrent
+	// transaction execution. A value of -1 means unlimited workers.  Default value is 10.
+	ConcurrencyWorkers int `mapstructure:"concurrency-workers"`
 }
 
 // APIConfig defines the API listener configuration.
@@ -236,6 +243,7 @@ func DefaultConfig() *Config {
 			IAVLDisableFastNode: true,
 			CompactionInterval:  0,
 			NoVersioning:        false,
+			ConcurrencyWorkers:  DefaultConcurrencyWorkers,
 		},
 		Telemetry: telemetry.Config{
 			Enabled:      false,
@@ -310,6 +318,7 @@ func GetConfig(v *viper.Viper) (Config, error) {
 			SeparateOrphanVersionsToKeep: v.GetInt64("separate-orphan-versions-to-keep"),
 			NumOrphanPerFile:             v.GetInt("num-orphan-per-file"),
 			OrphanDirectory:              v.GetString("orphan-dir"),
+			ConcurrencyWorkers:           v.GetInt("concurrency-workers"),
 		},
 		Telemetry: telemetry.Config{
 			ServiceName:             v.GetString("telemetry.service-name"),
diff --git a/server/config/config_test.go b/server/config/config_test.go
index ce733c346..040bfa788 100644
--- a/server/config/config_test.go
+++ b/server/config/config_test.go
@@ -23,3 +23,8 @@ func TestSetSnapshotDirectory(t *testing.T) {
 	cfg := DefaultConfig()
 	require.Equal(t, "", cfg.StateSync.SnapshotDirectory)
 }
+
+func TestSetConcurrencyWorkers(t *testing.T) {
+	cfg := DefaultConfig()
+	require.Equal(t, DefaultConcurrencyWorkers, cfg.ConcurrencyWorkers)
+}
diff --git a/server/config/toml.go b/server/config/toml.go
index 47571fdff..8844b04fb 100644
--- a/server/config/toml.go
+++ b/server/config/toml.go
@@ -101,6 +101,9 @@ num-orphan-per-file = {{ .BaseConfig.NumOrphanPerFile }}
 # if separate-orphan-storage is true, where to store orphan data
 orphan-dir = "{{ .BaseConfig.OrphanDirectory }}"
 
+# concurrency-workers defines how many workers to run for concurrent transaction execution
+# concurrency-workers = {{ .BaseConfig.ConcurrencyWorkers }}
+
 ###############################################################################
 ###                         Telemetry Configuration                         ###
 ###############################################################################
diff --git a/server/start.go b/server/start.go
index 14f4e9770..aedc274e4 100644
--- a/server/start.go
+++ b/server/start.go
@@ -70,6 +70,7 @@ const (
 	FlagSeparateOrphanVersionsToKeep = "separate-orphan-versions-to-keep"
 	FlagNumOrphanPerFile             = "num-orphan-per-file"
 	FlagOrphanDirectory              = "orphan-dir"
+	FlagConcurrencyWorkers           = "concurrency-workers"
 
 	// state sync-related flags
 	FlagStateSyncSnapshotInterval   = "state-sync.snapshot-interval"
@@ -252,6 +253,7 @@ is performed. Note, when enabled, gRPC will also be automatically enabled.
 	cmd.Flags().Int64(FlagSeparateOrphanVersionsToKeep, 2, "Number of versions to keep if storing orphans separately")
 	cmd.Flags().Int(FlagNumOrphanPerFile, 100000, "Number of orphans to store on each file if storing orphans separately")
 	cmd.Flags().String(FlagOrphanDirectory, path.Join(defaultNodeHome, "orphans"), "Directory to store orphan files if storing orphans separately")
+	cmd.Flags().Int(FlagConcurrencyWorkers, config.DefaultConcurrencyWorkers, "Number of workers to process concurrent transactions")
 
 	cmd.Flags().Bool(flagGRPCOnly, false, "Start the node in gRPC query only mode (no Tendermint process is started)")
 	cmd.Flags().Bool(flagGRPCEnable, true, "Define if the gRPC server should be enabled")

From 94bb98f7db4609482a490f6d3d98e549a8016f28 Mon Sep 17 00:00:00 2001
From: Uday Patil <udpatil@gmail.com>
Date: Tue, 10 Oct 2023 08:36:35 -0500
Subject: [PATCH 06/65] [occ] Occ multiversion store (#326)

## Describe your changes and provide context
This adds in functionality to write the latest multiversion values to
another store (to be used for writing to parent after transaction
execution), and also adds in helpers for writeset management such as
setting, invalidating, and setting estimated writesets.

## Testing performed to validate your change
Unit testing for added functionality
---
 store/multiversion/data_structures.go      |  34 ++++-
 store/multiversion/data_structures_test.go |  28 ++++
 store/multiversion/store.go                | 153 +++++++++++++++++++--
 store/multiversion/store_test.go           |  88 ++++++++++++
 4 files changed, 292 insertions(+), 11 deletions(-)

diff --git a/store/multiversion/data_structures.go b/store/multiversion/data_structures.go
index c4ca7b995..cba10d0f4 100644
--- a/store/multiversion/data_structures.go
+++ b/store/multiversion/data_structures.go
@@ -14,10 +14,12 @@ const (
 
 type MultiVersionValue interface {
 	GetLatest() (value MultiVersionValueItem, found bool)
+	GetLatestNonEstimate() (value MultiVersionValueItem, found bool)
 	GetLatestBeforeIndex(index int) (value MultiVersionValueItem, found bool)
 	Set(index int, incarnation int, value []byte)
 	SetEstimate(index int, incarnation int)
 	Delete(index int, incarnation int)
+	Remove(index int)
 }
 
 type MultiVersionValueItem interface {
@@ -42,8 +44,6 @@ func NewMultiVersionItem() *multiVersionItem {
 }
 
 // GetLatest returns the latest written value to the btree, and returns a boolean indicating whether it was found.
-//
-// A `nil` value along with `found=true` indicates a deletion that has occurred and the underlying parent store doesn't need to be hit.
 func (item *multiVersionItem) GetLatest() (MultiVersionValueItem, bool) {
 	item.mtx.RLock()
 	defer item.mtx.RUnlock()
@@ -56,6 +56,29 @@ func (item *multiVersionItem) GetLatest() (MultiVersionValueItem, bool) {
 	return valueItem, true
 }
 
+// GetLatestNonEstimate returns the latest written value that isn't an ESTIMATE and returns a boolean indicating whether it was found.
+// This can be used when we want to write finalized values, since ESTIMATEs can be considered to be irrelevant at that point
+func (item *multiVersionItem) GetLatestNonEstimate() (MultiVersionValueItem, bool) {
+	item.mtx.RLock()
+	defer item.mtx.RUnlock()
+
+	var vItem *valueItem
+	var found bool
+	item.valueTree.Descend(func(bTreeItem btree.Item) bool {
+		// only return if non-estimate
+		item := bTreeItem.(*valueItem)
+		if item.IsEstimate() {
+			// if estimate, continue
+			return true
+		}
+		// else we want to return
+		vItem = item
+		found = true
+		return false
+	})
+	return vItem, found
+}
+
 // GetLatest returns the latest written value to the btree prior to the index passed in, and returns a boolean indicating whether it was found.
 //
 // A `nil` value along with `found=true` indicates a deletion that has occurred and the underlying parent store doesn't need to be hit.
@@ -95,6 +118,13 @@ func (item *multiVersionItem) Delete(index int, incarnation int) {
 	item.valueTree.ReplaceOrInsert(deletedItem)
 }
 
+func (item *multiVersionItem) Remove(index int) {
+	item.mtx.Lock()
+	defer item.mtx.Unlock()
+
+	item.valueTree.Delete(&valueItem{index: index})
+}
+
 func (item *multiVersionItem) SetEstimate(index int, incarnation int) {
 	item.mtx.Lock()
 	defer item.mtx.Unlock()
diff --git a/store/multiversion/data_structures_test.go b/store/multiversion/data_structures_test.go
index 31696d366..fccc26a8b 100644
--- a/store/multiversion/data_structures_test.go
+++ b/store/multiversion/data_structures_test.go
@@ -198,3 +198,31 @@ func TestMultiversionItemEstimate(t *testing.T) {
 	require.True(t, found)
 	require.Equal(t, one, value.Value())
 }
+
+func TestMultiversionItemRemove(t *testing.T) {
+	mvItem := mv.NewMultiVersionItem()
+
+	mvItem.Set(1, 0, []byte("one"))
+	mvItem.Set(2, 0, []byte("two"))
+
+	mvItem.Remove(2)
+	value, found := mvItem.GetLatest()
+	require.True(t, found)
+	require.Equal(t, []byte("one"), value.Value())
+}
+
+func TestMultiversionItemGetLatestNonEstimate(t *testing.T) {
+	mvItem := mv.NewMultiVersionItem()
+
+	mvItem.SetEstimate(3, 0)
+
+	value, found := mvItem.GetLatestNonEstimate()
+	require.False(t, found)
+	require.Nil(t, value)
+
+	mvItem.Set(1, 0, []byte("one"))
+	value, found = mvItem.GetLatestNonEstimate()
+	require.True(t, found)
+	require.Equal(t, []byte("one"), value.Value())
+
+}
diff --git a/store/multiversion/store.go b/store/multiversion/store.go
index b52c6af1a..3aa4800f3 100644
--- a/store/multiversion/store.go
+++ b/store/multiversion/store.go
@@ -1,31 +1,43 @@
 package multiversion
 
 import (
+	"sort"
 	"sync"
+
+	"github.com/cosmos/cosmos-sdk/store/types"
 )
 
 type MultiVersionStore interface {
 	GetLatest(key []byte) (value MultiVersionValueItem)
 	GetLatestBeforeIndex(index int, key []byte) (value MultiVersionValueItem)
-	Set(index int, incarnation int, key []byte, value []byte)
-	SetEstimate(index int, incarnation int, key []byte)
-	Delete(index int, incarnation int, key []byte)
+	Set(index int, incarnation int, key []byte, value []byte) // TODO: maybe we don't need these if all writes are coming from writesets
+	SetEstimate(index int, incarnation int, key []byte)       // TODO: maybe we don't need these if all writes are coming from writesets
+	Delete(index int, incarnation int, key []byte)            // TODO: maybe we don't need these if all writes are coming from writesets
 	Has(index int, key []byte) bool
-	// TODO: do we want to add helper functions for validations with readsets / applying writesets ?
+	WriteLatestToStore(parentStore types.KVStore)
+	SetWriteset(index int, incarnation int, writeset WriteSet)
+	InvalidateWriteset(index int, incarnation int)
+	SetEstimatedWriteset(index int, incarnation int, writeset WriteSet)
+	GetAllWritesetKeys() map[int][]string
 }
 
+type WriteSet map[string][]byte
+
+var _ MultiVersionStore = (*Store)(nil)
+
 type Store struct {
 	mtx sync.RWMutex
 	// map that stores the key -> MultiVersionValue mapping for accessing from a given key
 	multiVersionMap map[string]MultiVersionValue
-	// TODO: do we need to add something here to persist readsets for later validation
-	// TODO: we need to support iterators as well similar to how cachekv does it
-	// TODO: do we need secondary indexing on index -> keys - this way if we need to abort we can replace those keys with ESTIMATE values? - maybe this just means storing writeset
+	// TODO: do we need to support iterators as well similar to how cachekv does it - yes
+
+	txWritesetKeys map[int][]string // map of tx index -> writeset keys
 }
 
 func NewMultiVersionStore() *Store {
 	return &Store{
 		multiVersionMap: make(map[string]MultiVersionValue),
+		txWritesetKeys:  make(map[int][]string),
 	}
 }
 
@@ -41,7 +53,7 @@ func (s *Store) GetLatest(key []byte) (value MultiVersionValueItem) {
 	}
 	val, found := s.multiVersionMap[keyString].GetLatest()
 	if !found {
-		return nil // this shouldn't be possible
+		return nil // this is possible IF there is are writeset that are then removed for that key
 	}
 	return val
 }
@@ -97,6 +109,95 @@ func (s *Store) Set(index int, incarnation int, key []byte, value []byte) {
 	s.multiVersionMap[keyString].Set(index, incarnation, value)
 }
 
+func (s *Store) removeOldWriteset(index int, newWriteSet WriteSet) {
+	writeset := make(map[string][]byte)
+	if newWriteSet != nil {
+		// if non-nil writeset passed in, we can use that to optimize removals
+		writeset = newWriteSet
+	}
+	// if there is already a writeset existing, we should remove that fully
+	if keys, ok := s.txWritesetKeys[index]; ok {
+		// we need to delete all of the keys in the writeset from the multiversion store
+		for _, key := range keys {
+			// small optimization to check if the new writeset is going to write this key, if so, we can leave it behind
+			if _, ok := writeset[key]; ok {
+				// we don't need to remove this key because it will be overwritten anyways - saves the operation of removing + rebalancing underlying btree
+				continue
+			}
+			// remove from the appropriate item if present in multiVersionMap
+			if val, ok := s.multiVersionMap[key]; ok {
+				val.Remove(index)
+			}
+		}
+	}
+	// unset the writesetKeys for this index
+	delete(s.txWritesetKeys, index)
+}
+
+// SetWriteset sets a writeset for a transaction index, and also writes all of the multiversion items in the writeset to the multiversion store.
+func (s *Store) SetWriteset(index int, incarnation int, writeset WriteSet) {
+	s.mtx.Lock()
+	defer s.mtx.Unlock()
+
+	// remove old writeset if it exists
+	s.removeOldWriteset(index, writeset)
+
+	writeSetKeys := make([]string, 0, len(writeset))
+	for key, value := range writeset {
+		writeSetKeys = append(writeSetKeys, key)
+		s.tryInitMultiVersionItem(key)
+		if value == nil {
+			// delete if nil value
+			s.multiVersionMap[key].Delete(index, incarnation)
+		} else {
+			s.multiVersionMap[key].Set(index, incarnation, value)
+		}
+	}
+	sort.Strings(writeSetKeys)
+	s.txWritesetKeys[index] = writeSetKeys
+}
+
+// InvalidateWriteset iterates over the keys for the given index and incarnation writeset and replaces with ESTIMATEs
+func (s *Store) InvalidateWriteset(index int, incarnation int) {
+	s.mtx.Lock()
+	defer s.mtx.Unlock()
+
+	if keys, ok := s.txWritesetKeys[index]; ok {
+		for _, key := range keys {
+			// invalidate all of the writeset items - is this suboptimal? - we could potentially do concurrently if slow because locking is on an item specific level
+			s.tryInitMultiVersionItem(key) // this SHOULD no-op because we're invalidating existing keys
+			s.multiVersionMap[key].SetEstimate(index, incarnation)
+		}
+	}
+	// we leave the writeset in place because we'll need it for key removal later if/when we replace with a new writeset
+}
+
+// SetEstimatedWriteset is used to directly write estimates instead of writing a writeset and later invalidating
+func (s *Store) SetEstimatedWriteset(index int, incarnation int, writeset WriteSet) {
+	s.mtx.Lock()
+	defer s.mtx.Unlock()
+
+	// remove old writeset if it exists
+	s.removeOldWriteset(index, writeset)
+
+	writeSetKeys := make([]string, 0, len(writeset))
+	// still need to save the writeset so we can remove the elements later:
+	for key := range writeset {
+		writeSetKeys = append(writeSetKeys, key)
+		s.tryInitMultiVersionItem(key)
+		s.multiVersionMap[key].SetEstimate(index, incarnation)
+	}
+	sort.Strings(writeSetKeys)
+	s.txWritesetKeys[index] = writeSetKeys
+}
+
+// GetWritesetKeys implements MultiVersionStore.
+func (s *Store) GetAllWritesetKeys() map[int][]string {
+	s.mtx.RLock()
+	defer s.mtx.RUnlock()
+	return s.txWritesetKeys
+}
+
 // SetEstimate implements MultiVersionStore.
 func (s *Store) SetEstimate(index int, incarnation int, key []byte) {
 	s.mtx.Lock()
@@ -117,4 +218,38 @@ func (s *Store) Delete(index int, incarnation int, key []byte) {
 	s.multiVersionMap[keyString].Delete(index, incarnation)
 }
 
-var _ MultiVersionStore = (*Store)(nil)
+func (s *Store) WriteLatestToStore(parentStore types.KVStore) {
+	s.mtx.Lock()
+	defer s.mtx.Unlock()
+
+	// sort the keys
+	keys := make([]string, 0, len(s.multiVersionMap))
+	for key := range s.multiVersionMap {
+		keys = append(keys, key)
+	}
+	sort.Strings(keys)
+
+	for _, key := range keys {
+		mvValue, found := s.multiVersionMap[key].GetLatestNonEstimate()
+		if !found {
+			// this means that at some point, there was an estimate, but we have since removed it so there isn't anything writeable at the key, so we can skip
+			continue
+		}
+		// we shouldn't have any ESTIMATE values when performing the write, because we read the latest non-estimate values only
+		if mvValue.IsEstimate() {
+			panic("should not have any estimate values when writing to parent store")
+		}
+		// if the value is deleted, then delete it from the parent store
+		if mvValue.IsDeleted() {
+			// We use []byte(key) instead of conv.UnsafeStrToBytes because we cannot
+			// be sure if the underlying store might do a save with the byteslice or
+			// not. Once we get confirmation that .Delete is guaranteed not to
+			// save the byteslice, then we can assume only a read-only copy is sufficient.
+			parentStore.Delete([]byte(key))
+			continue
+		}
+		if mvValue.Value() != nil {
+			parentStore.Set([]byte(key), mvValue.Value())
+		}
+	}
+}
diff --git a/store/multiversion/store_test.go b/store/multiversion/store_test.go
index 91465c435..732a5a6ba 100644
--- a/store/multiversion/store_test.go
+++ b/store/multiversion/store_test.go
@@ -3,8 +3,10 @@ package multiversion_test
 import (
 	"testing"
 
+	"github.com/cosmos/cosmos-sdk/store/dbadapter"
 	"github.com/cosmos/cosmos-sdk/store/multiversion"
 	"github.com/stretchr/testify/require"
+	dbm "github.com/tendermint/tm-db"
 )
 
 func TestMultiVersionStore(t *testing.T) {
@@ -52,3 +54,89 @@ func TestMultiVersionStoreKeyDNE(t *testing.T) {
 	require.Nil(t, store.GetLatestBeforeIndex(0, []byte("key1")))
 	require.False(t, store.Has(0, []byte("key1")))
 }
+
+func TestMultiVersionStoreWriteToParent(t *testing.T) {
+	// initialize cachekv store
+	parentKVStore := dbadapter.Store{DB: dbm.NewMemDB()}
+	mvs := multiversion.NewMultiVersionStore()
+
+	parentKVStore.Set([]byte("key2"), []byte("value0"))
+	parentKVStore.Set([]byte("key4"), []byte("value4"))
+
+	mvs.Set(1, 1, []byte("key1"), []byte("value1"))
+	mvs.Set(2, 1, []byte("key1"), []byte("value2"))
+	mvs.Set(3, 1, []byte("key2"), []byte("value3"))
+	mvs.Delete(1, 1, []byte("key3"))
+	mvs.Delete(1, 1, []byte("key4"))
+
+	mvs.WriteLatestToStore(parentKVStore)
+
+	// assert state in parent store
+	require.Equal(t, []byte("value2"), parentKVStore.Get([]byte("key1")))
+	require.Equal(t, []byte("value3"), parentKVStore.Get([]byte("key2")))
+	require.False(t, parentKVStore.Has([]byte("key3")))
+	require.False(t, parentKVStore.Has([]byte("key4")))
+
+	// verify no-op if mvs contains ESTIMATE
+	mvs.SetEstimate(1, 2, []byte("key5"))
+	mvs.WriteLatestToStore(parentKVStore)
+	require.False(t, parentKVStore.Has([]byte("key5")))
+}
+
+func TestMultiVersionStoreWritesetSetAndInvalidate(t *testing.T) {
+	mvs := multiversion.NewMultiVersionStore()
+
+	writeset := make(map[string][]byte)
+	writeset["key1"] = []byte("value1")
+	writeset["key2"] = []byte("value2")
+	writeset["key3"] = nil
+
+	mvs.SetWriteset(1, 2, writeset)
+	require.Equal(t, []byte("value1"), mvs.GetLatest([]byte("key1")).Value())
+	require.Equal(t, []byte("value2"), mvs.GetLatest([]byte("key2")).Value())
+	require.True(t, mvs.GetLatest([]byte("key3")).IsDeleted())
+
+	writeset2 := make(map[string][]byte)
+	writeset2["key1"] = []byte("value3")
+
+	mvs.SetWriteset(2, 1, writeset2)
+	require.Equal(t, []byte("value3"), mvs.GetLatest([]byte("key1")).Value())
+
+	// invalidate writeset1
+	mvs.InvalidateWriteset(1, 2)
+
+	// verify estimates
+	require.True(t, mvs.GetLatestBeforeIndex(2, []byte("key1")).IsEstimate())
+	require.True(t, mvs.GetLatestBeforeIndex(2, []byte("key2")).IsEstimate())
+	require.True(t, mvs.GetLatestBeforeIndex(2, []byte("key3")).IsEstimate())
+
+	// third writeset
+	writeset3 := make(map[string][]byte)
+	writeset3["key4"] = []byte("foo")
+	writeset3["key5"] = nil
+
+	// write the writeset directly as estimate
+	mvs.SetEstimatedWriteset(3, 1, writeset3)
+
+	require.True(t, mvs.GetLatest([]byte("key4")).IsEstimate())
+	require.True(t, mvs.GetLatest([]byte("key5")).IsEstimate())
+
+	// try replacing writeset1 to verify old keys removed
+	writeset1_b := make(map[string][]byte)
+	writeset1_b["key1"] = []byte("value4")
+
+	mvs.SetWriteset(1, 2, writeset1_b)
+	require.Equal(t, []byte("value4"), mvs.GetLatestBeforeIndex(2, []byte("key1")).Value())
+	require.Nil(t, mvs.GetLatestBeforeIndex(2, []byte("key2")))
+	// verify that GetLatest for key3 returns nil - because of removal from writeset
+	require.Nil(t, mvs.GetLatest([]byte("key3")))
+
+	// verify output for GetAllWritesetKeys
+	writesetKeys := mvs.GetAllWritesetKeys()
+	// we have 3 writesets
+	require.Equal(t, 3, len(writesetKeys))
+	require.Equal(t, []string{"key1"}, writesetKeys[1])
+	require.Equal(t, []string{"key1"}, writesetKeys[2])
+	require.Equal(t, []string{"key4", "key5"}, writesetKeys[3])
+
+}

From 5f894161eb61ff0e812e504240a98e9175231a53 Mon Sep 17 00:00:00 2001
From: Steven Landers <steven@seinetwork.io>
Date: Tue, 10 Oct 2023 14:51:23 -0400
Subject: [PATCH 07/65] [occ] Add batch tx delivery interface (#327)

## Describe your changes and provide context
- `sei-cosmos` will receive a list of transactions, so that sei-chain
does not need to hold the logic for OCC
- This will make the logic easier to test, as sei-cosmos will be fairly
self-contained
- Types can be extended within a tx and within request/response

Example interaction:
<img
src="https://github.com/sei-protocol/sei-cosmos/assets/6051744/58c9a263-7bc6-4ede-83ab-5e34794510b1"
width=50% height=50%>

## Testing performed to validate your change
- This is a skeleton for a batch interface
---
 baseapp/abci.go   | 16 +++++++++++++++-
 types/tx_batch.go | 27 +++++++++++++++++++++++++++
 2 files changed, 42 insertions(+), 1 deletion(-)
 create mode 100644 types/tx_batch.go

diff --git a/baseapp/abci.go b/baseapp/abci.go
index 7328f7195..ae74f852f 100644
--- a/baseapp/abci.go
+++ b/baseapp/abci.go
@@ -234,9 +234,23 @@ func (app *BaseApp) CheckTx(ctx context.Context, req *abci.RequestCheckTx) (*abc
 	}, nil
 }
 
+// DeliverTxBatch executes multiple txs
+// TODO: support occ logic with scheduling
+func (app *BaseApp) DeliverTxBatch(ctx sdk.Context, req sdk.DeliverTxBatchRequest) (res sdk.DeliverTxBatchResponse) {
+	// TODO: replace with actual scheduler logic
+	// This is stubbed so that it does something sensible
+	responses := make([]*sdk.DeliverTxResult, 0, len(req.TxEntries))
+	for _, tx := range req.TxEntries {
+		responses = append(responses, &sdk.DeliverTxResult{
+			Response: app.DeliverTx(ctx, tx.Request),
+		})
+	}
+	return sdk.DeliverTxBatchResponse{Results: responses}
+}
+
 // DeliverTx implements the ABCI interface and executes a tx in DeliverTx mode.
 // State only gets persisted if all messages are valid and get executed successfully.
-// Otherwise, the ResponseDeliverTx will contain releveant error information.
+// Otherwise, the ResponseDeliverTx will contain relevant error information.
 // Regardless of tx execution outcome, the ResponseDeliverTx will contain relevant
 // gas execution context.
 // TODO: (occ) this is the function called from sei-chain to perform execution of a transaction.
diff --git a/types/tx_batch.go b/types/tx_batch.go
new file mode 100644
index 000000000..a54742fae
--- /dev/null
+++ b/types/tx_batch.go
@@ -0,0 +1,27 @@
+package types
+
+import abci "github.com/tendermint/tendermint/abci/types"
+
+// DeliverTxEntry represents an individual transaction's request within a batch.
+// This can be extended to include tx-level tracing or metadata
+type DeliverTxEntry struct {
+	Request abci.RequestDeliverTx
+}
+
+// DeliverTxBatchRequest represents a request object for a batch of transactions.
+// This can be extended to include request-level tracing or metadata
+type DeliverTxBatchRequest struct {
+	TxEntries []*DeliverTxEntry
+}
+
+// DeliverTxResult represents an individual transaction's response within a batch.
+// This can be extended to include tx-level tracing or metadata
+type DeliverTxResult struct {
+	Response abci.ResponseDeliverTx
+}
+
+// DeliverTxBatchResponse represents a response object for a batch of transactions.
+// This can be extended to include response-level tracing or metadata
+type DeliverTxBatchResponse struct {
+	Results []*DeliverTxResult
+}

From 571d00a849704ad1ecdae8eac082f75c2859c1f0 Mon Sep 17 00:00:00 2001
From: Uday Patil <udpatil@gmail.com>
Date: Tue, 10 Oct 2023 14:30:14 -0500
Subject: [PATCH 08/65] [occ] MVKV store implementation and tests (#323)

## Describe your changes and provide context
This implements an mvkv store that will manage access from a transaction
execution to the underlying multiversion store and underlying parent
store if the multiversion store doesn't have that key. It will first
serve any reads from its own writeset and readset, but if it does have
to fall through to multiversion store or parent store, it will add those
values to the readset.

## Testing performed to validate your change
Unit tests
---
 store/multiversion/mvkv.go      | 268 ++++++++++++++++++++++++++++++++
 store/multiversion/mvkv_test.go | 250 +++++++++++++++++++++++++++++
 types/occ/scheduler.go          |  20 +++
 3 files changed, 538 insertions(+)
 create mode 100644 store/multiversion/mvkv.go
 create mode 100644 store/multiversion/mvkv_test.go
 create mode 100644 types/occ/scheduler.go

diff --git a/store/multiversion/mvkv.go b/store/multiversion/mvkv.go
new file mode 100644
index 000000000..697561355
--- /dev/null
+++ b/store/multiversion/mvkv.go
@@ -0,0 +1,268 @@
+package multiversion
+
+import (
+	"io"
+	"sort"
+	"sync"
+	"time"
+
+	"github.com/cosmos/cosmos-sdk/store/types"
+	"github.com/cosmos/cosmos-sdk/telemetry"
+	scheduler "github.com/cosmos/cosmos-sdk/types/occ"
+	dbm "github.com/tendermint/tm-db"
+)
+
+// Version Indexed Store wraps the multiversion store in a way that implements the KVStore interface, but also stores the index of the transaction, and so store actions are applied to the multiversion store using that index
+type VersionIndexedStore struct {
+	mtx sync.Mutex
+	// used for tracking reads and writes for eventual validation + persistence into multi-version store
+	readset  map[string][]byte // contains the key -> value mapping for all keys read from the store (not mvkv, underlying store)
+	writeset map[string][]byte // contains the key -> value mapping for all keys written to the store
+	// TODO: need to add iterateset here as well
+
+	// TODO: do we need this? - I think so? / maybe we just treat `nil` value in the writeset as a delete
+	deleted *sync.Map
+	// dirty keys that haven't been sorted yet for iteration
+	dirtySet map[string]struct{}
+	// used for iterators - populated at the time of iterator instantiation
+	// TODO: when we want to perform iteration, we need to move all the dirty keys (writeset and readset) into the sortedTree and then combine with the iterators for the underlying stores
+	sortedStore *dbm.MemDB // always ascending sorted
+	// parent stores (both multiversion and underlying parent store)
+	multiVersionStore MultiVersionStore
+	parent            types.KVStore
+	// transaction metadata for versioned operations
+	transactionIndex int
+	incarnation      int
+	// have abort channel here for aborting transactions
+	abortChannel chan scheduler.Abort
+}
+
+var _ types.KVStore = (*VersionIndexedStore)(nil)
+
+func NewVersionIndexedStore(parent types.KVStore, multiVersionStore MultiVersionStore, transactionIndex, incarnation int, abortChannel chan scheduler.Abort) *VersionIndexedStore {
+	return &VersionIndexedStore{
+		readset:           make(map[string][]byte),
+		writeset:          make(map[string][]byte),
+		deleted:           &sync.Map{},
+		dirtySet:          make(map[string]struct{}),
+		sortedStore:       dbm.NewMemDB(),
+		parent:            parent,
+		multiVersionStore: multiVersionStore,
+		transactionIndex:  transactionIndex,
+		incarnation:       incarnation,
+		abortChannel:      abortChannel,
+	}
+}
+
+// GetReadset returns the readset
+func (store *VersionIndexedStore) GetReadset() map[string][]byte {
+	return store.readset
+}
+
+// GetWriteset returns the writeset
+func (store *VersionIndexedStore) GetWriteset() map[string][]byte {
+	return store.writeset
+}
+
+// Get implements types.KVStore.
+func (store *VersionIndexedStore) Get(key []byte) []byte {
+	// first try to get from writeset cache, if cache miss, then try to get from multiversion store, if that misses, then get from parent store
+	// if the key is in the cache, return it
+
+	// don't have RW mutex because we have to update readset
+	store.mtx.Lock()
+	defer store.mtx.Unlock()
+	defer telemetry.MeasureSince(time.Now(), "store", "mvkv", "get")
+
+	types.AssertValidKey(key)
+	strKey := string(key)
+	// first check the MVKV writeset, and return that value if present
+	cacheValue, ok := store.writeset[strKey]
+	if ok {
+		// return the value from the cache, no need to update any readset stuff
+		return cacheValue
+	}
+	// read the readset to see if the value exists - and return if applicable
+	if readsetVal, ok := store.readset[strKey]; ok {
+		return readsetVal
+	}
+
+	// if we didn't find it, then we want to check the multivalue store + add to readset if applicable
+	mvsValue := store.multiVersionStore.GetLatestBeforeIndex(store.transactionIndex, key)
+	if mvsValue != nil {
+		if mvsValue.IsEstimate() {
+			store.abortChannel <- scheduler.NewEstimateAbort(mvsValue.Index())
+			return nil
+		} else {
+			// This handles both detecting readset conflicts and updating readset if applicable
+			return store.parseValueAndUpdateReadset(strKey, mvsValue)
+		}
+	}
+	// if we didn't find it in the multiversion store, then we want to check the parent store + add to readset
+	parentValue := store.parent.Get(key)
+	store.updateReadSet(key, parentValue)
+	return parentValue
+}
+
+// This functions handles reads with deleted items and values and verifies that the data is consistent to what we currently have in the readset (IF we have a readset value for that key)
+func (store *VersionIndexedStore) parseValueAndUpdateReadset(strKey string, mvsValue MultiVersionValueItem) []byte {
+	value := mvsValue.Value()
+	if mvsValue.IsDeleted() {
+		value = nil
+	}
+	store.updateReadSet([]byte(strKey), value)
+	return value
+}
+
+// This function iterates over the readset, validating that the values in the readset are consistent with the values in the multiversion store and underlying parent store, and returns a boolean indicating validity
+func (store *VersionIndexedStore) ValidateReadset() bool {
+	store.mtx.Lock()
+	defer store.mtx.Unlock()
+	defer telemetry.MeasureSince(time.Now(), "store", "mvkv", "validate_readset")
+
+	// sort the readset keys - this is so we have consistent behavior when theres varying conflicts within the readset (eg. read conflict vs estimate)
+	readsetKeys := make([]string, 0, len(store.readset))
+	for key := range store.readset {
+		readsetKeys = append(readsetKeys, key)
+	}
+	sort.Strings(readsetKeys)
+
+	// iterate over readset keys and values
+	for _, strKey := range readsetKeys {
+		key := []byte(strKey)
+		value := store.readset[strKey]
+		mvsValue := store.multiVersionStore.GetLatestBeforeIndex(store.transactionIndex, key)
+		if mvsValue != nil {
+			if mvsValue.IsEstimate() {
+				// if we see an estimate, that means that we need to abort and rerun
+				store.abortChannel <- scheduler.NewEstimateAbort(mvsValue.Index())
+				return false
+			} else {
+				if mvsValue.IsDeleted() {
+					// check for `nil`
+					if value != nil {
+						return false
+					}
+				} else {
+					// check for equality
+					if string(value) != string(mvsValue.Value()) {
+						return false
+					}
+				}
+			}
+			continue // value is valid, continue to next key
+		}
+
+		parentValue := store.parent.Get(key)
+		if string(parentValue) != string(value) {
+			// this shouldnt happen because if we have a conflict it should always happen within multiversion store
+			panic("we shouldn't ever have a readset conflict in parent store")
+		}
+		// value was correct, we can continue to the next value
+	}
+	return true
+}
+
+// Delete implements types.KVStore.
+func (store *VersionIndexedStore) Delete(key []byte) {
+	store.mtx.Lock()
+	defer store.mtx.Unlock()
+	defer telemetry.MeasureSince(time.Now(), "store", "mvkv", "delete")
+
+	types.AssertValidKey(key)
+	store.setValue(key, nil, true, true)
+}
+
+// Has implements types.KVStore.
+func (store *VersionIndexedStore) Has(key []byte) bool {
+	// necessary locking happens within store.Get
+	return store.Get(key) != nil
+}
+
+// Set implements types.KVStore.
+func (store *VersionIndexedStore) Set(key []byte, value []byte) {
+	store.mtx.Lock()
+	defer store.mtx.Unlock()
+	defer telemetry.MeasureSince(time.Now(), "store", "mvkv", "set")
+
+	types.AssertValidKey(key)
+	store.setValue(key, value, false, true)
+}
+
+// Iterator implements types.KVStore.
+func (v *VersionIndexedStore) Iterator(start []byte, end []byte) dbm.Iterator {
+	panic("unimplemented")
+}
+
+// ReverseIterator implements types.KVStore.
+func (v *VersionIndexedStore) ReverseIterator(start []byte, end []byte) dbm.Iterator {
+	panic("unimplemented")
+}
+
+// GetStoreType implements types.KVStore.
+func (v *VersionIndexedStore) GetStoreType() types.StoreType {
+	return v.parent.GetStoreType()
+}
+
+// CacheWrap implements types.KVStore.
+func (*VersionIndexedStore) CacheWrap(storeKey types.StoreKey) types.CacheWrap {
+	panic("CacheWrap not supported for version indexed store")
+}
+
+// CacheWrapWithListeners implements types.KVStore.
+func (*VersionIndexedStore) CacheWrapWithListeners(storeKey types.StoreKey, listeners []types.WriteListener) types.CacheWrap {
+	panic("CacheWrapWithListeners not supported for version indexed store")
+}
+
+// CacheWrapWithTrace implements types.KVStore.
+func (*VersionIndexedStore) CacheWrapWithTrace(storeKey types.StoreKey, w io.Writer, tc types.TraceContext) types.CacheWrap {
+	panic("CacheWrapWithTrace not supported for version indexed store")
+}
+
+// GetWorkingHash implements types.KVStore.
+func (v *VersionIndexedStore) GetWorkingHash() ([]byte, error) {
+	panic("should never attempt to get working hash from version indexed store")
+}
+
+// Only entrypoint to mutate writeset
+func (store *VersionIndexedStore) setValue(key, value []byte, deleted bool, dirty bool) {
+	types.AssertValidKey(key)
+
+	keyStr := string(key)
+	store.writeset[keyStr] = value
+	if deleted {
+		store.deleted.Store(keyStr, struct{}{})
+	} else {
+		store.deleted.Delete(keyStr)
+	}
+	if dirty {
+		store.dirtySet[keyStr] = struct{}{}
+	}
+}
+
+func (store *VersionIndexedStore) WriteToMultiVersionStore() {
+	store.mtx.Lock()
+	defer store.mtx.Unlock()
+	defer telemetry.MeasureSince(time.Now(), "store", "mvkv", "write_mvs")
+	store.multiVersionStore.SetWriteset(store.transactionIndex, store.incarnation, store.writeset)
+}
+
+func (store *VersionIndexedStore) WriteEstimatesToMultiVersionStore() {
+	store.mtx.Lock()
+	defer store.mtx.Unlock()
+	defer telemetry.MeasureSince(time.Now(), "store", "mvkv", "write_mvs")
+	store.multiVersionStore.SetEstimatedWriteset(store.transactionIndex, store.incarnation, store.writeset)
+}
+
+func (store *VersionIndexedStore) updateReadSet(key []byte, value []byte) {
+	// add to readset
+	keyStr := string(key)
+	store.readset[keyStr] = value
+	// add to dirty set
+	store.dirtySet[keyStr] = struct{}{}
+}
+
+func (store *VersionIndexedStore) isDeleted(key string) bool {
+	_, ok := store.deleted.Load(key)
+	return ok
+}
diff --git a/store/multiversion/mvkv_test.go b/store/multiversion/mvkv_test.go
new file mode 100644
index 000000000..df1692d1f
--- /dev/null
+++ b/store/multiversion/mvkv_test.go
@@ -0,0 +1,250 @@
+package multiversion_test
+
+import (
+	"testing"
+
+	"github.com/cosmos/cosmos-sdk/store/cachekv"
+	"github.com/cosmos/cosmos-sdk/store/dbadapter"
+	"github.com/cosmos/cosmos-sdk/store/multiversion"
+	"github.com/cosmos/cosmos-sdk/store/types"
+	scheduler "github.com/cosmos/cosmos-sdk/types/occ"
+	"github.com/stretchr/testify/require"
+	dbm "github.com/tendermint/tm-db"
+)
+
+func TestVersionIndexedStoreGetters(t *testing.T) {
+	mem := dbadapter.Store{DB: dbm.NewMemDB()}
+	parentKVStore := cachekv.NewStore(mem, types.NewKVStoreKey("mock"), 1000)
+	mvs := multiversion.NewMultiVersionStore()
+	// initialize a new VersionIndexedStore
+	vis := multiversion.NewVersionIndexedStore(parentKVStore, mvs, 1, 2, make(chan scheduler.Abort))
+
+	// mock a value in the parent store
+	parentKVStore.Set([]byte("key1"), []byte("value1"))
+
+	// read key that doesn't exist
+	val := vis.Get([]byte("key2"))
+	require.Nil(t, val)
+	require.False(t, vis.Has([]byte("key2")))
+
+	// read key that falls down to parent store
+	val2 := vis.Get([]byte("key1"))
+	require.Equal(t, []byte("value1"), val2)
+	require.True(t, vis.Has([]byte("key1")))
+	// verify value now in readset
+	require.Equal(t, []byte("value1"), vis.GetReadset()["key1"])
+
+	// read the same key that should now be served from the readset (can be verified by setting a different value for the key in the parent store)
+	parentKVStore.Set([]byte("key1"), []byte("value2")) // realistically shouldn't happen, modifying to verify readset access
+	val3 := vis.Get([]byte("key1"))
+	require.True(t, vis.Has([]byte("key1")))
+	require.Equal(t, []byte("value1"), val3)
+
+	// test deleted value written to MVS but not parent store
+	mvs.Delete(0, 2, []byte("delKey"))
+	parentKVStore.Set([]byte("delKey"), []byte("value4"))
+	valDel := vis.Get([]byte("delKey"))
+	require.Nil(t, valDel)
+	require.False(t, vis.Has([]byte("delKey")))
+
+	// set different key in MVS - for various indices
+	mvs.Set(0, 2, []byte("key3"), []byte("value3"))
+	mvs.Set(2, 1, []byte("key3"), []byte("value4"))
+	mvs.SetEstimate(5, 0, []byte("key3"))
+
+	// read the key that falls down to MVS
+	val4 := vis.Get([]byte("key3"))
+	// should equal value3 because value4 is later than the key in question
+	require.Equal(t, []byte("value3"), val4)
+	require.True(t, vis.Has([]byte("key3")))
+
+	// try a read that falls through to MVS with a later tx index
+	vis2 := multiversion.NewVersionIndexedStore(parentKVStore, mvs, 3, 2, make(chan scheduler.Abort))
+	val5 := vis2.Get([]byte("key3"))
+	// should equal value3 because value4 is later than the key in question
+	require.Equal(t, []byte("value4"), val5)
+	require.True(t, vis2.Has([]byte("key3")))
+
+	// test estimate values writing to abortChannel
+	abortChannel := make(chan scheduler.Abort)
+	vis3 := multiversion.NewVersionIndexedStore(parentKVStore, mvs, 6, 2, abortChannel)
+	go func() {
+		vis3.Get([]byte("key3"))
+	}()
+	abort := <-abortChannel // read the abort from the channel
+	require.Equal(t, 5, abort.DependentTxIdx)
+	require.Equal(t, scheduler.ErrReadEstimate, abort.Err)
+
+	vis.Set([]byte("key4"), []byte("value4"))
+	// verify proper response for GET
+	val6 := vis.Get([]byte("key4"))
+	require.True(t, vis.Has([]byte("key4")))
+	require.Equal(t, []byte("value4"), val6)
+	// verify that its in the writeset
+	require.Equal(t, []byte("value4"), vis.GetWriteset()["key4"])
+	// verify that its not in the readset
+	require.Nil(t, vis.GetReadset()["key4"])
+}
+
+func TestVersionIndexedStoreSetters(t *testing.T) {
+	mem := dbadapter.Store{DB: dbm.NewMemDB()}
+	parentKVStore := cachekv.NewStore(mem, types.NewKVStoreKey("mock"), 1000)
+	mvs := multiversion.NewMultiVersionStore()
+	// initialize a new VersionIndexedStore
+	vis := multiversion.NewVersionIndexedStore(parentKVStore, mvs, 1, 2, make(chan scheduler.Abort))
+
+	// test simple set
+	vis.Set([]byte("key1"), []byte("value1"))
+	require.Equal(t, []byte("value1"), vis.GetWriteset()["key1"])
+
+	mvs.Set(0, 1, []byte("key2"), []byte("value2"))
+	vis.Delete([]byte("key2"))
+	require.Nil(t, vis.Get([]byte("key2")))
+	// because the delete should be at the writeset level, we should not have populated the readset
+	require.Zero(t, len(vis.GetReadset()))
+
+	// try setting the value again, and then read
+	vis.Set([]byte("key2"), []byte("value3"))
+	require.Equal(t, []byte("value3"), vis.Get([]byte("key2")))
+	require.Zero(t, len(vis.GetReadset()))
+}
+
+func TestVersionIndexedStoreBoilerplateFunctions(t *testing.T) {
+	mem := dbadapter.Store{DB: dbm.NewMemDB()}
+	parentKVStore := cachekv.NewStore(mem, types.NewKVStoreKey("mock"), 1000)
+	mvs := multiversion.NewMultiVersionStore()
+	// initialize a new VersionIndexedStore
+	vis := multiversion.NewVersionIndexedStore(parentKVStore, mvs, 1, 2, make(chan scheduler.Abort))
+
+	// asserts panics where appropriate
+	require.Panics(t, func() { vis.CacheWrap(types.NewKVStoreKey("mock")) })
+	require.Panics(t, func() { vis.CacheWrapWithListeners(types.NewKVStoreKey("mock"), nil) })
+	require.Panics(t, func() { vis.CacheWrapWithTrace(types.NewKVStoreKey("mock"), nil, nil) })
+	require.Panics(t, func() { vis.GetWorkingHash() })
+
+	// assert properly returns store type
+	require.Equal(t, types.StoreTypeDB, vis.GetStoreType())
+}
+
+func TestVersionIndexedStoreWrite(t *testing.T) {
+	mem := dbadapter.Store{DB: dbm.NewMemDB()}
+	parentKVStore := cachekv.NewStore(mem, types.NewKVStoreKey("mock"), 1000)
+	mvs := multiversion.NewMultiVersionStore()
+	// initialize a new VersionIndexedStore
+	vis := multiversion.NewVersionIndexedStore(parentKVStore, mvs, 1, 2, make(chan scheduler.Abort))
+
+	mvs.Set(0, 1, []byte("key3"), []byte("value3"))
+
+	require.False(t, mvs.Has(3, []byte("key1")))
+	require.False(t, mvs.Has(3, []byte("key2")))
+	require.True(t, mvs.Has(3, []byte("key3")))
+
+	// write some keys
+	vis.Set([]byte("key1"), []byte("value1"))
+	vis.Set([]byte("key2"), []byte("value2"))
+	vis.Delete([]byte("key3"))
+
+	vis.WriteToMultiVersionStore()
+
+	require.Equal(t, []byte("value1"), mvs.GetLatest([]byte("key1")).Value())
+	require.Equal(t, []byte("value2"), mvs.GetLatest([]byte("key2")).Value())
+	require.True(t, mvs.GetLatest([]byte("key3")).IsDeleted())
+}
+
+func TestVersionIndexedStoreWriteEstimates(t *testing.T) {
+	mem := dbadapter.Store{DB: dbm.NewMemDB()}
+	parentKVStore := cachekv.NewStore(mem, types.NewKVStoreKey("mock"), 1000)
+	mvs := multiversion.NewMultiVersionStore()
+	// initialize a new VersionIndexedStore
+	vis := multiversion.NewVersionIndexedStore(parentKVStore, mvs, 1, 2, make(chan scheduler.Abort))
+
+	mvs.Set(0, 1, []byte("key3"), []byte("value3"))
+
+	require.False(t, mvs.Has(3, []byte("key1")))
+	require.False(t, mvs.Has(3, []byte("key2")))
+	require.True(t, mvs.Has(3, []byte("key3")))
+
+	// write some keys
+	vis.Set([]byte("key1"), []byte("value1"))
+	vis.Set([]byte("key2"), []byte("value2"))
+	vis.Delete([]byte("key3"))
+
+	vis.WriteEstimatesToMultiVersionStore()
+
+	require.True(t, mvs.GetLatest([]byte("key1")).IsEstimate())
+	require.True(t, mvs.GetLatest([]byte("key2")).IsEstimate())
+	require.True(t, mvs.GetLatest([]byte("key3")).IsEstimate())
+}
+
+func TestVersionIndexedStoreValidation(t *testing.T) {
+	mem := dbadapter.Store{DB: dbm.NewMemDB()}
+	parentKVStore := cachekv.NewStore(mem, types.NewKVStoreKey("mock"), 1000)
+	mvs := multiversion.NewMultiVersionStore()
+	// initialize a new VersionIndexedStore
+	abortC := make(chan scheduler.Abort)
+	vis := multiversion.NewVersionIndexedStore(parentKVStore, mvs, 2, 2, abortC)
+	// set some initial values
+	parentKVStore.Set([]byte("key4"), []byte("value4"))
+	parentKVStore.Set([]byte("key5"), []byte("value5"))
+	parentKVStore.Set([]byte("deletedKey"), []byte("foo"))
+	mvs.Set(0, 1, []byte("key1"), []byte("value1"))
+	mvs.Set(0, 1, []byte("key2"), []byte("value2"))
+	mvs.Delete(0, 1, []byte("deletedKey"))
+
+	// load those into readset
+	vis.Get([]byte("key1"))
+	vis.Get([]byte("key2"))
+	vis.Get([]byte("key4"))
+	vis.Get([]byte("key5"))
+	vis.Get([]byte("keyDNE"))
+	vis.Get([]byte("deletedKey"))
+
+	// everything checks out, so we should be able to validate successfully
+	require.True(t, vis.ValidateReadset())
+	// modify underlying transaction key that is unrelated
+	mvs.Set(1, 1, []byte("key3"), []byte("value3"))
+	// should still have valid readset
+	require.True(t, vis.ValidateReadset())
+
+	// modify underlying transaction key that is related
+	mvs.Set(1, 1, []byte("key1"), []byte("value1_b"))
+	// should now have invalid readset
+	require.False(t, vis.ValidateReadset())
+	// reset so readset is valid again
+	mvs.Set(1, 1, []byte("key1"), []byte("value1"))
+	require.True(t, vis.ValidateReadset())
+
+	// mvs has a value that was initially read from parent
+	mvs.Set(1, 2, []byte("key4"), []byte("value4_b"))
+	require.False(t, vis.ValidateReadset())
+	// reset key
+	mvs.Set(1, 2, []byte("key4"), []byte("value4"))
+	require.True(t, vis.ValidateReadset())
+
+	// mvs has a value that was initially read from parent - BUT in a later tx index
+	mvs.Set(4, 2, []byte("key4"), []byte("value4_c"))
+	// readset should remain valid
+	require.True(t, vis.ValidateReadset())
+
+	// mvs has an estimate
+	mvs.SetEstimate(1, 2, []byte("key2"))
+	// readset should be invalid now - but via abort channel write
+	go func() {
+		vis.ValidateReadset()
+	}()
+	abort := <-abortC // read the abort from the channel
+	require.Equal(t, 1, abort.DependentTxIdx)
+
+	// test key deleted later
+	mvs.Delete(1, 1, []byte("key2"))
+	require.False(t, vis.ValidateReadset())
+	// reset key2
+	mvs.Set(1, 1, []byte("key2"), []byte("value2"))
+
+	// lastly verify panic if parent kvstore has a conflict - this shouldn't happen but lets assert that it would panic
+	parentKVStore.Set([]byte("keyDNE"), []byte("foobar"))
+	require.Equal(t, []byte("foobar"), parentKVStore.Get([]byte("keyDNE")))
+	require.Panics(t, func() {
+		vis.ValidateReadset()
+	})
+}
diff --git a/types/occ/scheduler.go b/types/occ/scheduler.go
new file mode 100644
index 000000000..3905be395
--- /dev/null
+++ b/types/occ/scheduler.go
@@ -0,0 +1,20 @@
+package scheduler
+
+import "errors"
+
+var (
+	ErrReadEstimate = errors.New("multiversion store value contains estimate, cannot read, aborting")
+)
+
+// define the return struct for abort due to conflict
+type Abort struct {
+	DependentTxIdx int
+	Err            error
+}
+
+func NewEstimateAbort(dependentTxIdx int) Abort {
+	return Abort{
+		DependentTxIdx: dependentTxIdx,
+		Err:            ErrReadEstimate,
+	}
+}

From 9886602f56661d9cadd6a5a20c51107c27ca7ea1 Mon Sep 17 00:00:00 2001
From: Uday Patil <udpatil@gmail.com>
Date: Fri, 13 Oct 2023 09:42:46 -0500
Subject: [PATCH 09/65] [occ] Add validation function for transaction state to
 multiversionstore (#330)

## Describe your changes and provide context
This adds in validation for transaction state to multiversion store, and
implements readset validation for it as well.

## Testing performed to validate your change
Unit Test
---
 store/multiversion/mvkv_test.go  |  98 +++++++++++++++++------
 store/multiversion/store.go      |  98 +++++++++++++++--------
 store/multiversion/store_test.go | 129 ++++++++++++++++++++++++++-----
 3 files changed, 249 insertions(+), 76 deletions(-)

diff --git a/store/multiversion/mvkv_test.go b/store/multiversion/mvkv_test.go
index df1692d1f..e17cba65c 100644
--- a/store/multiversion/mvkv_test.go
+++ b/store/multiversion/mvkv_test.go
@@ -15,7 +15,7 @@ import (
 func TestVersionIndexedStoreGetters(t *testing.T) {
 	mem := dbadapter.Store{DB: dbm.NewMemDB()}
 	parentKVStore := cachekv.NewStore(mem, types.NewKVStoreKey("mock"), 1000)
-	mvs := multiversion.NewMultiVersionStore()
+	mvs := multiversion.NewMultiVersionStore(parentKVStore)
 	// initialize a new VersionIndexedStore
 	vis := multiversion.NewVersionIndexedStore(parentKVStore, mvs, 1, 2, make(chan scheduler.Abort))
 
@@ -41,16 +41,25 @@ func TestVersionIndexedStoreGetters(t *testing.T) {
 	require.Equal(t, []byte("value1"), val3)
 
 	// test deleted value written to MVS but not parent store
-	mvs.Delete(0, 2, []byte("delKey"))
+	mvs.SetWriteset(0, 2, map[string][]byte{
+		"delKey": nil,
+	})
 	parentKVStore.Set([]byte("delKey"), []byte("value4"))
 	valDel := vis.Get([]byte("delKey"))
 	require.Nil(t, valDel)
 	require.False(t, vis.Has([]byte("delKey")))
 
 	// set different key in MVS - for various indices
-	mvs.Set(0, 2, []byte("key3"), []byte("value3"))
-	mvs.Set(2, 1, []byte("key3"), []byte("value4"))
-	mvs.SetEstimate(5, 0, []byte("key3"))
+	mvs.SetWriteset(0, 2, map[string][]byte{
+		"delKey": nil,
+		"key3":   []byte("value3"),
+	})
+	mvs.SetWriteset(2, 1, map[string][]byte{
+		"key3": []byte("value4"),
+	})
+	mvs.SetEstimatedWriteset(5, 0, map[string][]byte{
+		"key3": nil,
+	})
 
 	// read the key that falls down to MVS
 	val4 := vis.Get([]byte("key3"))
@@ -89,7 +98,7 @@ func TestVersionIndexedStoreGetters(t *testing.T) {
 func TestVersionIndexedStoreSetters(t *testing.T) {
 	mem := dbadapter.Store{DB: dbm.NewMemDB()}
 	parentKVStore := cachekv.NewStore(mem, types.NewKVStoreKey("mock"), 1000)
-	mvs := multiversion.NewMultiVersionStore()
+	mvs := multiversion.NewMultiVersionStore(parentKVStore)
 	// initialize a new VersionIndexedStore
 	vis := multiversion.NewVersionIndexedStore(parentKVStore, mvs, 1, 2, make(chan scheduler.Abort))
 
@@ -97,7 +106,9 @@ func TestVersionIndexedStoreSetters(t *testing.T) {
 	vis.Set([]byte("key1"), []byte("value1"))
 	require.Equal(t, []byte("value1"), vis.GetWriteset()["key1"])
 
-	mvs.Set(0, 1, []byte("key2"), []byte("value2"))
+	mvs.SetWriteset(0, 1, map[string][]byte{
+		"key2": []byte("value2"),
+	})
 	vis.Delete([]byte("key2"))
 	require.Nil(t, vis.Get([]byte("key2")))
 	// because the delete should be at the writeset level, we should not have populated the readset
@@ -112,7 +123,7 @@ func TestVersionIndexedStoreSetters(t *testing.T) {
 func TestVersionIndexedStoreBoilerplateFunctions(t *testing.T) {
 	mem := dbadapter.Store{DB: dbm.NewMemDB()}
 	parentKVStore := cachekv.NewStore(mem, types.NewKVStoreKey("mock"), 1000)
-	mvs := multiversion.NewMultiVersionStore()
+	mvs := multiversion.NewMultiVersionStore(parentKVStore)
 	// initialize a new VersionIndexedStore
 	vis := multiversion.NewVersionIndexedStore(parentKVStore, mvs, 1, 2, make(chan scheduler.Abort))
 
@@ -129,11 +140,13 @@ func TestVersionIndexedStoreBoilerplateFunctions(t *testing.T) {
 func TestVersionIndexedStoreWrite(t *testing.T) {
 	mem := dbadapter.Store{DB: dbm.NewMemDB()}
 	parentKVStore := cachekv.NewStore(mem, types.NewKVStoreKey("mock"), 1000)
-	mvs := multiversion.NewMultiVersionStore()
+	mvs := multiversion.NewMultiVersionStore(parentKVStore)
 	// initialize a new VersionIndexedStore
 	vis := multiversion.NewVersionIndexedStore(parentKVStore, mvs, 1, 2, make(chan scheduler.Abort))
 
-	mvs.Set(0, 1, []byte("key3"), []byte("value3"))
+	mvs.SetWriteset(0, 1, map[string][]byte{
+		"key3": []byte("value3"),
+	})
 
 	require.False(t, mvs.Has(3, []byte("key1")))
 	require.False(t, mvs.Has(3, []byte("key2")))
@@ -154,11 +167,13 @@ func TestVersionIndexedStoreWrite(t *testing.T) {
 func TestVersionIndexedStoreWriteEstimates(t *testing.T) {
 	mem := dbadapter.Store{DB: dbm.NewMemDB()}
 	parentKVStore := cachekv.NewStore(mem, types.NewKVStoreKey("mock"), 1000)
-	mvs := multiversion.NewMultiVersionStore()
+	mvs := multiversion.NewMultiVersionStore(parentKVStore)
 	// initialize a new VersionIndexedStore
 	vis := multiversion.NewVersionIndexedStore(parentKVStore, mvs, 1, 2, make(chan scheduler.Abort))
 
-	mvs.Set(0, 1, []byte("key3"), []byte("value3"))
+	mvs.SetWriteset(0, 1, map[string][]byte{
+		"key3": []byte("value3"),
+	})
 
 	require.False(t, mvs.Has(3, []byte("key1")))
 	require.False(t, mvs.Has(3, []byte("key2")))
@@ -179,7 +194,7 @@ func TestVersionIndexedStoreWriteEstimates(t *testing.T) {
 func TestVersionIndexedStoreValidation(t *testing.T) {
 	mem := dbadapter.Store{DB: dbm.NewMemDB()}
 	parentKVStore := cachekv.NewStore(mem, types.NewKVStoreKey("mock"), 1000)
-	mvs := multiversion.NewMultiVersionStore()
+	mvs := multiversion.NewMultiVersionStore(parentKVStore)
 	// initialize a new VersionIndexedStore
 	abortC := make(chan scheduler.Abort)
 	vis := multiversion.NewVersionIndexedStore(parentKVStore, mvs, 2, 2, abortC)
@@ -187,9 +202,12 @@ func TestVersionIndexedStoreValidation(t *testing.T) {
 	parentKVStore.Set([]byte("key4"), []byte("value4"))
 	parentKVStore.Set([]byte("key5"), []byte("value5"))
 	parentKVStore.Set([]byte("deletedKey"), []byte("foo"))
-	mvs.Set(0, 1, []byte("key1"), []byte("value1"))
-	mvs.Set(0, 1, []byte("key2"), []byte("value2"))
-	mvs.Delete(0, 1, []byte("deletedKey"))
+
+	mvs.SetWriteset(0, 1, map[string][]byte{
+		"key1":       []byte("value1"),
+		"key2":       []byte("value2"),
+		"deletedKey": nil,
+	})
 
 	// load those into readset
 	vis.Get([]byte("key1"))
@@ -202,32 +220,52 @@ func TestVersionIndexedStoreValidation(t *testing.T) {
 	// everything checks out, so we should be able to validate successfully
 	require.True(t, vis.ValidateReadset())
 	// modify underlying transaction key that is unrelated
-	mvs.Set(1, 1, []byte("key3"), []byte("value3"))
+	mvs.SetWriteset(1, 1, map[string][]byte{
+		"key3": []byte("value3"),
+	})
 	// should still have valid readset
 	require.True(t, vis.ValidateReadset())
 
 	// modify underlying transaction key that is related
-	mvs.Set(1, 1, []byte("key1"), []byte("value1_b"))
+	mvs.SetWriteset(1, 1, map[string][]byte{
+		"key3": []byte("value3"),
+		"key1": []byte("value1_b"),
+	})
 	// should now have invalid readset
 	require.False(t, vis.ValidateReadset())
 	// reset so readset is valid again
-	mvs.Set(1, 1, []byte("key1"), []byte("value1"))
+	mvs.SetWriteset(1, 1, map[string][]byte{
+		"key3": []byte("value3"),
+		"key1": []byte("value1"),
+	})
 	require.True(t, vis.ValidateReadset())
 
 	// mvs has a value that was initially read from parent
-	mvs.Set(1, 2, []byte("key4"), []byte("value4_b"))
+	mvs.SetWriteset(1, 1, map[string][]byte{
+		"key3": []byte("value3"),
+		"key1": []byte("value1"),
+		"key4": []byte("value4_b"),
+	})
 	require.False(t, vis.ValidateReadset())
 	// reset key
-	mvs.Set(1, 2, []byte("key4"), []byte("value4"))
+	mvs.SetWriteset(1, 1, map[string][]byte{
+		"key3": []byte("value3"),
+		"key1": []byte("value1"),
+		"key4": []byte("value4"),
+	})
 	require.True(t, vis.ValidateReadset())
 
 	// mvs has a value that was initially read from parent - BUT in a later tx index
-	mvs.Set(4, 2, []byte("key4"), []byte("value4_c"))
+	mvs.SetWriteset(4, 2, map[string][]byte{
+		"key4": []byte("value4_c"),
+	})
 	// readset should remain valid
 	require.True(t, vis.ValidateReadset())
 
 	// mvs has an estimate
-	mvs.SetEstimate(1, 2, []byte("key2"))
+	mvs.SetEstimatedWriteset(1, 1, map[string][]byte{
+		"key2": nil,
+	})
 	// readset should be invalid now - but via abort channel write
 	go func() {
 		vis.ValidateReadset()
@@ -236,10 +274,20 @@ func TestVersionIndexedStoreValidation(t *testing.T) {
 	require.Equal(t, 1, abort.DependentTxIdx)
 
 	// test key deleted later
-	mvs.Delete(1, 1, []byte("key2"))
+	mvs.SetWriteset(1, 1, map[string][]byte{
+		"key3": []byte("value3"),
+		"key1": []byte("value1"),
+		"key4": []byte("value4"),
+		"key2": nil,
+	})
 	require.False(t, vis.ValidateReadset())
 	// reset key2
-	mvs.Set(1, 1, []byte("key2"), []byte("value2"))
+	mvs.SetWriteset(1, 1, map[string][]byte{
+		"key3": []byte("value3"),
+		"key1": []byte("value1"),
+		"key4": []byte("value4"),
+		"key2": []byte("value2"),
+	})
 
 	// lastly verify panic if parent kvstore has a conflict - this shouldn't happen but lets assert that it would panic
 	parentKVStore.Set([]byte("keyDNE"), []byte("foobar"))
diff --git a/store/multiversion/store.go b/store/multiversion/store.go
index 3aa4800f3..08c45204b 100644
--- a/store/multiversion/store.go
+++ b/store/multiversion/store.go
@@ -1,27 +1,31 @@
 package multiversion
 
 import (
+	"bytes"
 	"sort"
 	"sync"
+	"time"
 
 	"github.com/cosmos/cosmos-sdk/store/types"
+	"github.com/cosmos/cosmos-sdk/telemetry"
 )
 
 type MultiVersionStore interface {
 	GetLatest(key []byte) (value MultiVersionValueItem)
 	GetLatestBeforeIndex(index int, key []byte) (value MultiVersionValueItem)
-	Set(index int, incarnation int, key []byte, value []byte) // TODO: maybe we don't need these if all writes are coming from writesets
-	SetEstimate(index int, incarnation int, key []byte)       // TODO: maybe we don't need these if all writes are coming from writesets
-	Delete(index int, incarnation int, key []byte)            // TODO: maybe we don't need these if all writes are coming from writesets
 	Has(index int, key []byte) bool
-	WriteLatestToStore(parentStore types.KVStore)
+	WriteLatestToStore()
 	SetWriteset(index int, incarnation int, writeset WriteSet)
 	InvalidateWriteset(index int, incarnation int)
 	SetEstimatedWriteset(index int, incarnation int, writeset WriteSet)
 	GetAllWritesetKeys() map[int][]string
+	SetReadset(index int, readset ReadSet)
+	GetReadset(index int) ReadSet
+	ValidateTransactionState(index int) []int
 }
 
 type WriteSet map[string][]byte
+type ReadSet map[string][]byte
 
 var _ MultiVersionStore = (*Store)(nil)
 
@@ -32,12 +36,17 @@ type Store struct {
 	// TODO: do we need to support iterators as well similar to how cachekv does it - yes
 
 	txWritesetKeys map[int][]string // map of tx index -> writeset keys
+	txReadSets     map[int]ReadSet
+
+	parentStore types.KVStore
 }
 
-func NewMultiVersionStore() *Store {
+func NewMultiVersionStore(parentStore types.KVStore) *Store {
 	return &Store{
 		multiVersionMap: make(map[string]MultiVersionValue),
 		txWritesetKeys:  make(map[int][]string),
+		txReadSets:      make(map[int]ReadSet),
+		parentStore:     parentStore,
 	}
 }
 
@@ -99,16 +108,6 @@ func (s *Store) tryInitMultiVersionItem(keyString string) {
 	}
 }
 
-// Set implements MultiVersionStore.
-func (s *Store) Set(index int, incarnation int, key []byte, value []byte) {
-	s.mtx.Lock()
-	defer s.mtx.Unlock()
-
-	keyString := string(key)
-	s.tryInitMultiVersionItem(keyString)
-	s.multiVersionMap[keyString].Set(index, incarnation, value)
-}
-
 func (s *Store) removeOldWriteset(index int, newWriteSet WriteSet) {
 	writeset := make(map[string][]byte)
 	if newWriteSet != nil {
@@ -135,6 +134,7 @@ func (s *Store) removeOldWriteset(index int, newWriteSet WriteSet) {
 }
 
 // SetWriteset sets a writeset for a transaction index, and also writes all of the multiversion items in the writeset to the multiversion store.
+// TODO: returns a list of NEW keys added
 func (s *Store) SetWriteset(index int, incarnation int, writeset WriteSet) {
 	s.mtx.Lock()
 	defer s.mtx.Unlock()
@@ -153,7 +153,7 @@ func (s *Store) SetWriteset(index int, incarnation int, writeset WriteSet) {
 			s.multiVersionMap[key].Set(index, incarnation, value)
 		}
 	}
-	sort.Strings(writeSetKeys)
+	sort.Strings(writeSetKeys) // TODO: if we're sorting here anyways, maybe we just put it into a btree instead of a slice
 	s.txWritesetKeys[index] = writeSetKeys
 }
 
@@ -198,27 +198,63 @@ func (s *Store) GetAllWritesetKeys() map[int][]string {
 	return s.txWritesetKeys
 }
 
-// SetEstimate implements MultiVersionStore.
-func (s *Store) SetEstimate(index int, incarnation int, key []byte) {
+func (s *Store) SetReadset(index int, readset ReadSet) {
 	s.mtx.Lock()
 	defer s.mtx.Unlock()
 
-	keyString := string(key)
-	s.tryInitMultiVersionItem(keyString)
-	s.multiVersionMap[keyString].SetEstimate(index, incarnation)
+	s.txReadSets[index] = readset
 }
 
-// Delete implements MultiVersionStore.
-func (s *Store) Delete(index int, incarnation int, key []byte) {
-	s.mtx.Lock()
-	defer s.mtx.Unlock()
+func (s *Store) GetReadset(index int) ReadSet {
+	s.mtx.RLock()
+	defer s.mtx.RUnlock()
 
-	keyString := string(key)
-	s.tryInitMultiVersionItem(keyString)
-	s.multiVersionMap[keyString].Delete(index, incarnation)
+	return s.txReadSets[index]
+}
+
+func (s *Store) ValidateTransactionState(index int) []int {
+	defer telemetry.MeasureSince(time.Now(), "store", "mvs", "validate")
+	conflictSet := map[int]struct{}{}
+
+	// validate readset
+	readset := s.GetReadset(index)
+	// iterate over readset and check if the value is the same as the latest value relateive to txIndex in the multiversion store
+	for key, value := range readset {
+		// get the latest value from the multiversion store
+		latestValue := s.GetLatestBeforeIndex(index, []byte(key))
+		if latestValue == nil {
+			// TODO: maybe we don't even do this check?
+			parentVal := s.parentStore.Get([]byte(key))
+			if !bytes.Equal(parentVal, value) {
+				panic("there shouldn't be readset conflicts with parent kv store, since it shouldn't change")
+			}
+		} else {
+			// if estimate, mark as conflict index
+			if latestValue.IsEstimate() {
+				conflictSet[latestValue.Index()] = struct{}{}
+			} else if latestValue.IsDeleted() {
+				if value != nil {
+					// conflict
+					conflictSet[latestValue.Index()] = struct{}{}
+				}
+			} else if !bytes.Equal(latestValue.Value(), value) {
+				conflictSet[latestValue.Index()] = struct{}{}
+			}
+		}
+	}
+	// TODO: validate iterateset
+
+	// convert conflictset into sorted indices
+	conflictIndices := make([]int, 0, len(conflictSet))
+	for index := range conflictSet {
+		conflictIndices = append(conflictIndices, index)
+	}
+
+	sort.Ints(conflictIndices)
+	return conflictIndices
 }
 
-func (s *Store) WriteLatestToStore(parentStore types.KVStore) {
+func (s *Store) WriteLatestToStore() {
 	s.mtx.Lock()
 	defer s.mtx.Unlock()
 
@@ -245,11 +281,11 @@ func (s *Store) WriteLatestToStore(parentStore types.KVStore) {
 			// be sure if the underlying store might do a save with the byteslice or
 			// not. Once we get confirmation that .Delete is guaranteed not to
 			// save the byteslice, then we can assume only a read-only copy is sufficient.
-			parentStore.Delete([]byte(key))
+			s.parentStore.Delete([]byte(key))
 			continue
 		}
 		if mvValue.Value() != nil {
-			parentStore.Set([]byte(key), mvValue.Value())
+			s.parentStore.Set([]byte(key), mvValue.Value())
 		}
 	}
 }
diff --git a/store/multiversion/store_test.go b/store/multiversion/store_test.go
index 732a5a6ba..bb56d1e71 100644
--- a/store/multiversion/store_test.go
+++ b/store/multiversion/store_test.go
@@ -10,25 +10,38 @@ import (
 )
 
 func TestMultiVersionStore(t *testing.T) {
-	store := multiversion.NewMultiVersionStore()
+	store := multiversion.NewMultiVersionStore(nil)
 
 	// Test Set and GetLatest
-	store.Set(1, 1, []byte("key1"), []byte("value1"))
-	store.Set(2, 1, []byte("key1"), []byte("value2"))
-	store.Set(3, 1, []byte("key2"), []byte("value3"))
+	store.SetWriteset(1, 1, map[string][]byte{
+		"key1": []byte("value1"),
+	})
+	store.SetWriteset(2, 1, map[string][]byte{
+		"key1": []byte("value2"),
+	})
+	store.SetWriteset(3, 1, map[string][]byte{
+		"key2": []byte("value3"),
+	})
+
 	require.Equal(t, []byte("value2"), store.GetLatest([]byte("key1")).Value())
 	require.Equal(t, []byte("value3"), store.GetLatest([]byte("key2")).Value())
 
 	// Test SetEstimate
-	store.SetEstimate(4, 1, []byte("key1"))
+	store.SetEstimatedWriteset(4, 1, map[string][]byte{
+		"key1": nil,
+	})
 	require.True(t, store.GetLatest([]byte("key1")).IsEstimate())
 
 	// Test Delete
-	store.Delete(5, 1, []byte("key1"))
+	store.SetWriteset(5, 1, map[string][]byte{
+		"key1": nil,
+	})
 	require.True(t, store.GetLatest([]byte("key1")).IsDeleted())
 
 	// Test GetLatestBeforeIndex
-	store.Set(6, 1, []byte("key1"), []byte("value4"))
+	store.SetWriteset(6, 1, map[string][]byte{
+		"key1": []byte("value4"),
+	})
 	require.True(t, store.GetLatestBeforeIndex(5, []byte("key1")).IsEstimate())
 	require.Equal(t, []byte("value4"), store.GetLatestBeforeIndex(7, []byte("key1")).Value())
 
@@ -39,16 +52,18 @@ func TestMultiVersionStore(t *testing.T) {
 }
 
 func TestMultiVersionStoreHasLaterValue(t *testing.T) {
-	store := multiversion.NewMultiVersionStore()
+	store := multiversion.NewMultiVersionStore(nil)
 
-	store.Set(5, 1, []byte("key1"), []byte("value2"))
+	store.SetWriteset(5, 1, map[string][]byte{
+		"key1": []byte("value2"),
+	})
 
 	require.Nil(t, store.GetLatestBeforeIndex(4, []byte("key1")))
 	require.Equal(t, []byte("value2"), store.GetLatestBeforeIndex(6, []byte("key1")).Value())
 }
 
 func TestMultiVersionStoreKeyDNE(t *testing.T) {
-	store := multiversion.NewMultiVersionStore()
+	store := multiversion.NewMultiVersionStore(nil)
 
 	require.Nil(t, store.GetLatest([]byte("key1")))
 	require.Nil(t, store.GetLatestBeforeIndex(0, []byte("key1")))
@@ -58,18 +73,24 @@ func TestMultiVersionStoreKeyDNE(t *testing.T) {
 func TestMultiVersionStoreWriteToParent(t *testing.T) {
 	// initialize cachekv store
 	parentKVStore := dbadapter.Store{DB: dbm.NewMemDB()}
-	mvs := multiversion.NewMultiVersionStore()
+	mvs := multiversion.NewMultiVersionStore(parentKVStore)
 
 	parentKVStore.Set([]byte("key2"), []byte("value0"))
 	parentKVStore.Set([]byte("key4"), []byte("value4"))
 
-	mvs.Set(1, 1, []byte("key1"), []byte("value1"))
-	mvs.Set(2, 1, []byte("key1"), []byte("value2"))
-	mvs.Set(3, 1, []byte("key2"), []byte("value3"))
-	mvs.Delete(1, 1, []byte("key3"))
-	mvs.Delete(1, 1, []byte("key4"))
+	mvs.SetWriteset(1, 1, map[string][]byte{
+		"key1": []byte("value1"),
+		"key3": nil,
+		"key4": nil,
+	})
+	mvs.SetWriteset(2, 1, map[string][]byte{
+		"key1": []byte("value2"),
+	})
+	mvs.SetWriteset(3, 1, map[string][]byte{
+		"key2": []byte("value3"),
+	})
 
-	mvs.WriteLatestToStore(parentKVStore)
+	mvs.WriteLatestToStore()
 
 	// assert state in parent store
 	require.Equal(t, []byte("value2"), parentKVStore.Get([]byte("key1")))
@@ -78,13 +99,18 @@ func TestMultiVersionStoreWriteToParent(t *testing.T) {
 	require.False(t, parentKVStore.Has([]byte("key4")))
 
 	// verify no-op if mvs contains ESTIMATE
-	mvs.SetEstimate(1, 2, []byte("key5"))
-	mvs.WriteLatestToStore(parentKVStore)
+	mvs.SetEstimatedWriteset(1, 2, map[string][]byte{
+		"key1": []byte("value1"),
+		"key3": nil,
+		"key4": nil,
+		"key5": nil,
+	})
+	mvs.WriteLatestToStore()
 	require.False(t, parentKVStore.Has([]byte("key5")))
 }
 
 func TestMultiVersionStoreWritesetSetAndInvalidate(t *testing.T) {
-	mvs := multiversion.NewMultiVersionStore()
+	mvs := multiversion.NewMultiVersionStore(nil)
 
 	writeset := make(map[string][]byte)
 	writeset["key1"] = []byte("value1")
@@ -140,3 +166,66 @@ func TestMultiVersionStoreWritesetSetAndInvalidate(t *testing.T) {
 	require.Equal(t, []string{"key4", "key5"}, writesetKeys[3])
 
 }
+
+func TestMultiVersionStoreValidateState(t *testing.T) {
+	parentKVStore := dbadapter.Store{DB: dbm.NewMemDB()}
+	mvs := multiversion.NewMultiVersionStore(parentKVStore)
+
+	parentKVStore.Set([]byte("key2"), []byte("value0"))
+	parentKVStore.Set([]byte("key3"), []byte("value3"))
+	parentKVStore.Set([]byte("key4"), []byte("value4"))
+	parentKVStore.Set([]byte("key5"), []byte("value5"))
+
+	writeset := make(multiversion.WriteSet)
+	writeset["key1"] = []byte("value1")
+	writeset["key2"] = []byte("value2")
+	writeset["key3"] = nil
+	mvs.SetWriteset(1, 2, writeset)
+
+	readset := make(multiversion.ReadSet)
+	readset["key1"] = []byte("value1")
+	readset["key2"] = []byte("value2")
+	readset["key3"] = nil
+	readset["key4"] = []byte("value4")
+	readset["key5"] = []byte("value5")
+	mvs.SetReadset(5, readset)
+
+	// assert no readset is valid
+	conflicts := mvs.ValidateTransactionState(4)
+	require.Empty(t, conflicts)
+
+	// assert readset index 5 is valid
+	conflicts = mvs.ValidateTransactionState(5)
+	require.Empty(t, conflicts)
+
+	// introduce conflict
+	mvs.SetWriteset(2, 1, map[string][]byte{
+		"key3": []byte("value6"),
+	})
+
+	// expect index 2 to be returned
+	conflicts = mvs.ValidateTransactionState(5)
+	require.Equal(t, []int{2}, conflicts)
+
+	// add a conflict due to deletion
+	mvs.SetWriteset(3, 1, map[string][]byte{
+		"key1": nil,
+	})
+
+	// expect indices 2 and 3 to be returned
+	conflicts = mvs.ValidateTransactionState(5)
+	require.Equal(t, []int{2, 3}, conflicts)
+
+	// add a conflict due to estimate
+	mvs.SetEstimatedWriteset(4, 1, map[string][]byte{
+		"key2": []byte("test"),
+	})
+
+	// expect indices 2, 3, and 4to be returned
+	conflicts = mvs.ValidateTransactionState(5)
+	require.Equal(t, []int{2, 3, 4}, conflicts)
+
+	// assert panic for parent store mismatch
+	parentKVStore.Set([]byte("key5"), []byte("value6"))
+	require.Panics(t, func() { mvs.ValidateTransactionState(5) })
+}

From 293ac79d279ffbfef505f2001a44ee087e3605e9 Mon Sep 17 00:00:00 2001
From: Steven Landers <steven@seinetwork.io>
Date: Tue, 17 Oct 2023 10:20:41 -0400
Subject: [PATCH 10/65] [occ] Add basic worker task and scheduler shell (#328)

## Describe your changes and provide context
- Adds a basic scheduler shell (see TODOs)
- Adds a basic task definition with request/response/index
- Listens to abort channel after an execution to determine conflict

## Testing performed to validate your change
- Compiles (holding off until shape is validated)
- Basic Unit Test for ProcessAll
---
 baseapp/abci.go                      |  25 +++-
 tasks/scheduler.go                   | 187 +++++++++++++++++++++++++++
 tasks/scheduler_test.go              |  59 +++++++++
 types/occ/{scheduler.go => types.go} |   8 +-
 4 files changed, 269 insertions(+), 10 deletions(-)
 create mode 100644 tasks/scheduler.go
 create mode 100644 tasks/scheduler_test.go
 rename types/occ/{scheduler.go => types.go} (76%)

diff --git a/baseapp/abci.go b/baseapp/abci.go
index ae74f852f..586162ca3 100644
--- a/baseapp/abci.go
+++ b/baseapp/abci.go
@@ -6,6 +6,7 @@ import (
 	"encoding/json"
 	"errors"
 	"fmt"
+	"github.com/cosmos/cosmos-sdk/tasks"
 	"os"
 	"sort"
 	"strings"
@@ -237,13 +238,23 @@ func (app *BaseApp) CheckTx(ctx context.Context, req *abci.RequestCheckTx) (*abc
 // DeliverTxBatch executes multiple txs
 // TODO: support occ logic with scheduling
 func (app *BaseApp) DeliverTxBatch(ctx sdk.Context, req sdk.DeliverTxBatchRequest) (res sdk.DeliverTxBatchResponse) {
-	// TODO: replace with actual scheduler logic
-	// This is stubbed so that it does something sensible
-	responses := make([]*sdk.DeliverTxResult, 0, len(req.TxEntries))
+	//TODO: inject multiversion store without import cycle (figure out right place for this)
+	// ctx = ctx.WithMultiVersionStore(multiversion.NewMultiVersionStore())
+
+	reqList := make([]abci.RequestDeliverTx, 0, len(req.TxEntries))
 	for _, tx := range req.TxEntries {
-		responses = append(responses, &sdk.DeliverTxResult{
-			Response: app.DeliverTx(ctx, tx.Request),
-		})
+		reqList = append(reqList, tx.Request)
+	}
+
+	scheduler := tasks.NewScheduler(app.concurrencyWorkers, app.DeliverTx)
+	txRes, err := scheduler.ProcessAll(ctx, reqList)
+	if err != nil {
+		//TODO: handle error
+	}
+
+	responses := make([]*sdk.DeliverTxResult, 0, len(req.TxEntries))
+	for _, tx := range txRes {
+		responses = append(responses, &sdk.DeliverTxResult{Response: tx})
 	}
 	return sdk.DeliverTxBatchResponse{Results: responses}
 }
@@ -254,7 +265,7 @@ func (app *BaseApp) DeliverTxBatch(ctx sdk.Context, req sdk.DeliverTxBatchReques
 // Regardless of tx execution outcome, the ResponseDeliverTx will contain relevant
 // gas execution context.
 // TODO: (occ) this is the function called from sei-chain to perform execution of a transaction.
-// We'd likely replace this with an execution task that is scheduled by the OCC scheduler
+// We'd likely replace this with an execution tasks that is scheduled by the OCC scheduler
 func (app *BaseApp) DeliverTx(ctx sdk.Context, req abci.RequestDeliverTx) (res abci.ResponseDeliverTx) {
 	defer telemetry.MeasureSince(time.Now(), "abci", "deliver_tx")
 	defer func() {
diff --git a/tasks/scheduler.go b/tasks/scheduler.go
new file mode 100644
index 000000000..c8b063fe2
--- /dev/null
+++ b/tasks/scheduler.go
@@ -0,0 +1,187 @@
+package tasks
+
+import (
+	sdk "github.com/cosmos/cosmos-sdk/types"
+	"github.com/tendermint/tendermint/abci/types"
+	"golang.org/x/sync/errgroup"
+)
+
+type status string
+
+const (
+	// statusPending tasks are ready for execution
+	// all executing tasks are in pending state
+	statusPending status = "pending"
+	// statusExecuted tasks are ready for validation
+	// these tasks did not abort during execution
+	statusExecuted status = "executed"
+	// statusAborted means the task has been aborted
+	// these tasks transition to pending upon next execution
+	statusAborted status = "aborted"
+	// statusValidated means the task has been validated
+	// tasks in this status can be reset if an earlier task fails validation
+	statusValidated status = "validated"
+)
+
+type deliverTxTask struct {
+	Status      status
+	Index       int
+	Incarnation int
+	Request     types.RequestDeliverTx
+	Response    *types.ResponseDeliverTx
+}
+
+// Scheduler processes tasks concurrently
+type Scheduler interface {
+	ProcessAll(ctx sdk.Context, reqs []types.RequestDeliverTx) ([]types.ResponseDeliverTx, error)
+}
+
+type scheduler struct {
+	deliverTx func(ctx sdk.Context, req types.RequestDeliverTx) (res types.ResponseDeliverTx)
+	workers   int
+}
+
+// NewScheduler creates a new scheduler
+func NewScheduler(workers int, deliverTxFunc func(ctx sdk.Context, req types.RequestDeliverTx) (res types.ResponseDeliverTx)) Scheduler {
+	return &scheduler{
+		workers:   workers,
+		deliverTx: deliverTxFunc,
+	}
+}
+
+func toTasks(reqs []types.RequestDeliverTx) []*deliverTxTask {
+	res := make([]*deliverTxTask, 0, len(reqs))
+	for idx, r := range reqs {
+		res = append(res, &deliverTxTask{
+			Request: r,
+			Index:   idx,
+			Status:  statusPending,
+		})
+	}
+	return res
+}
+
+func collectResponses(tasks []*deliverTxTask) []types.ResponseDeliverTx {
+	res := make([]types.ResponseDeliverTx, 0, len(tasks))
+	for _, t := range tasks {
+		res = append(res, *t.Response)
+	}
+	return res
+}
+
+func (s *scheduler) ProcessAll(ctx sdk.Context, reqs []types.RequestDeliverTx) ([]types.ResponseDeliverTx, error) {
+	tasks := toTasks(reqs)
+	toExecute := tasks
+	for len(toExecute) > 0 {
+
+		// execute sets statuses of tasks to either executed or aborted
+		err := s.executeAll(ctx, toExecute)
+		if err != nil {
+			return nil, err
+		}
+
+		// validate returns any that should be re-executed
+		// note this processes ALL tasks, not just those recently executed
+		toExecute, err = s.validateAll(ctx, tasks)
+		if err != nil {
+			return nil, err
+		}
+		for _, t := range toExecute {
+			t.Incarnation++
+			t.Status = statusPending
+			t.Response = nil
+			//TODO: reset anything that needs resetting
+		}
+	}
+	return collectResponses(tasks), nil
+}
+
+// TODO: validate each tasks
+// TODO: return list of tasks that are invalid
+func (s *scheduler) validateAll(ctx sdk.Context, tasks []*deliverTxTask) ([]*deliverTxTask, error) {
+	var res []*deliverTxTask
+
+	// find first non-validated entry
+	var startIdx int
+	for idx, t := range tasks {
+		if t.Status != statusValidated {
+			startIdx = idx
+			break
+		}
+	}
+
+	for i := startIdx; i < len(tasks); i++ {
+		// any aborted tx is known to be suspect here
+		if tasks[i].Status == statusAborted {
+			res = append(res, tasks[i])
+		} else {
+			//TODO: validate the tasks and add it if invalid
+			//TODO: create and handle abort for validation
+			tasks[i].Status = statusValidated
+		}
+	}
+	return res, nil
+}
+
+// ExecuteAll executes all tasks concurrently
+// Tasks are updated with their status
+// TODO: retries on aborted tasks
+// TODO: error scenarios
+func (s *scheduler) executeAll(ctx sdk.Context, tasks []*deliverTxTask) error {
+	ch := make(chan *deliverTxTask, len(tasks))
+	grp, gCtx := errgroup.WithContext(ctx.Context())
+
+	// a workers value < 1 means no limit
+	workers := s.workers
+	if s.workers < 1 {
+		workers = len(tasks)
+	}
+
+	for i := 0; i < workers; i++ {
+		grp.Go(func() error {
+			for {
+				select {
+				case <-gCtx.Done():
+					return gCtx.Err()
+				case task, ok := <-ch:
+					if !ok {
+						return nil
+					}
+					//TODO: ensure version multi store is on context
+					// buffered so it doesn't block on write
+					// abortCh := make(chan occ.Abort, 1)
+
+					//TODO: consume from abort in non-blocking way (give it a length)
+					resp := s.deliverTx(ctx, task.Request)
+
+					// close(abortCh)
+
+					//if _, ok := <-abortCh; ok {
+					//	tasks.status = TaskStatusAborted
+					//	continue
+					//}
+
+					task.Status = statusExecuted
+					task.Response = &resp
+				}
+			}
+		})
+	}
+	grp.Go(func() error {
+		defer close(ch)
+		for _, task := range tasks {
+			select {
+			case <-gCtx.Done():
+				return gCtx.Err()
+			case ch <- task:
+			}
+		}
+		return nil
+	})
+
+	if err := grp.Wait(); err != nil {
+		return err
+	}
+
+	return nil
+}
diff --git a/tasks/scheduler_test.go b/tasks/scheduler_test.go
new file mode 100644
index 000000000..ba9d97846
--- /dev/null
+++ b/tasks/scheduler_test.go
@@ -0,0 +1,59 @@
+package tasks
+
+import (
+	"context"
+	sdk "github.com/cosmos/cosmos-sdk/types"
+	"github.com/stretchr/testify/assert"
+	"github.com/tendermint/tendermint/abci/types"
+	"testing"
+)
+
+type mockDeliverTxFunc func(ctx sdk.Context, req types.RequestDeliverTx) types.ResponseDeliverTx
+
+func (f mockDeliverTxFunc) DeliverTx(ctx sdk.Context, req types.RequestDeliverTx) types.ResponseDeliverTx {
+	return f(ctx, req)
+}
+
+func requestList(n int) []types.RequestDeliverTx {
+	tasks := make([]types.RequestDeliverTx, n)
+	for i := 0; i < n; i++ {
+		tasks[i] = types.RequestDeliverTx{}
+	}
+	return tasks
+}
+
+func TestProcessAll(t *testing.T) {
+	tests := []struct {
+		name          string
+		workers       int
+		requests      []types.RequestDeliverTx
+		deliverTxFunc mockDeliverTxFunc
+		expectedErr   error
+	}{
+		{
+			name:     "All tasks processed without aborts",
+			workers:  2,
+			requests: requestList(5),
+			deliverTxFunc: func(ctx sdk.Context, req types.RequestDeliverTx) types.ResponseDeliverTx {
+				return types.ResponseDeliverTx{}
+			},
+			expectedErr: nil,
+		},
+		//TODO: Add more test cases
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			s := NewScheduler(tt.workers, tt.deliverTxFunc.DeliverTx)
+			ctx := sdk.Context{}.WithContext(context.Background())
+
+			res, err := s.ProcessAll(ctx, tt.requests)
+			if err != tt.expectedErr {
+				t.Errorf("Expected error %v, got %v", tt.expectedErr, err)
+			} else {
+				// response for each request exists
+				assert.Len(t, res, len(tt.requests))
+			}
+		})
+	}
+}
diff --git a/types/occ/scheduler.go b/types/occ/types.go
similarity index 76%
rename from types/occ/scheduler.go
rename to types/occ/types.go
index 3905be395..de321b7cb 100644
--- a/types/occ/scheduler.go
+++ b/types/occ/types.go
@@ -1,12 +1,14 @@
-package scheduler
+package occ
 
-import "errors"
+import (
+	"errors"
+)
 
 var (
 	ErrReadEstimate = errors.New("multiversion store value contains estimate, cannot read, aborting")
 )
 
-// define the return struct for abort due to conflict
+// Abort contains the information for a transaction's conflict
 type Abort struct {
 	DependentTxIdx int
 	Err            error

From dfb22604c3b21409394cb9ef7b7a7a098fab3921 Mon Sep 17 00:00:00 2001
From: Uday Patil <udpatil@gmail.com>
Date: Tue, 17 Oct 2023 13:15:23 -0500
Subject: [PATCH 11/65] [occ] Implement iterator for mvkv (#329)

## Describe your changes and provide context
This implements Iterator and ReverseIterator for mvkv for the KVStore
interface. The memiterator will be composed of versionindexedstore and
multiversionstore, and will yield values in a cascading fashion firstly
from the writeset, and then second from the multiversion store.

This still needs optimization to persisted sorted keys instead of
reconstructing sorted keys each time.

## Testing performed to validate your change
Unit test to verify basic functionality
---
 store/multiversion/memiterator.go   |  74 ++++++++
 store/multiversion/mergeiterator.go | 256 ++++++++++++++++++++++++++++
 store/multiversion/mvkv.go          |  68 ++++++--
 store/multiversion/mvkv_test.go     |  74 ++++++++
 4 files changed, 457 insertions(+), 15 deletions(-)
 create mode 100644 store/multiversion/memiterator.go
 create mode 100644 store/multiversion/mergeiterator.go

diff --git a/store/multiversion/memiterator.go b/store/multiversion/memiterator.go
new file mode 100644
index 000000000..a3fb74323
--- /dev/null
+++ b/store/multiversion/memiterator.go
@@ -0,0 +1,74 @@
+package multiversion
+
+import (
+	dbm "github.com/tendermint/tm-db"
+
+	"github.com/cosmos/cosmos-sdk/store/types"
+	scheduler "github.com/cosmos/cosmos-sdk/types/occ"
+)
+
+// Iterates over iterKVCache items.
+// if key is nil, means it was deleted.
+// Implements Iterator.
+type memIterator struct {
+	types.Iterator
+
+	mvStore      MultiVersionStore
+	writeset     map[string][]byte
+	index        int
+	abortChannel chan scheduler.Abort
+}
+
+func (store *VersionIndexedStore) newMemIterator(
+	start, end []byte,
+	items *dbm.MemDB,
+	ascending bool,
+) *memIterator {
+	var iter types.Iterator
+	var err error
+
+	if ascending {
+		iter, err = items.Iterator(start, end)
+	} else {
+		iter, err = items.ReverseIterator(start, end)
+	}
+
+	if err != nil {
+		if iter != nil {
+			iter.Close()
+		}
+		panic(err)
+	}
+
+	return &memIterator{
+		Iterator:     iter,
+		mvStore:      store.multiVersionStore,
+		index:        store.transactionIndex,
+		abortChannel: store.abortChannel,
+		writeset:     store.GetWriteset(),
+	}
+}
+
+// try to get value from the writeset, otherwise try to get from multiversion store, otherwise try to get from parent iterator
+func (mi *memIterator) Value() []byte {
+	key := mi.Iterator.Key()
+
+	// try fetch from writeset - return if exists
+	if val, ok := mi.writeset[string(key)]; ok {
+		return val
+	}
+
+	// get the value from the multiversion store
+	val := mi.mvStore.GetLatestBeforeIndex(mi.index, key)
+
+	// if we have an estiamte, write to abort channel
+	if val.IsEstimate() {
+		mi.abortChannel <- scheduler.NewEstimateAbort(val.Index())
+	}
+
+	// if we have a deleted value, return nil
+	if val.IsDeleted() {
+		return nil
+	}
+	return val.Value()
+}
diff --git a/store/multiversion/mergeiterator.go b/store/multiversion/mergeiterator.go
new file mode 100644
index 000000000..c0a9d23ef
--- /dev/null
+++ b/store/multiversion/mergeiterator.go
@@ -0,0 +1,256 @@
+package multiversion
+
+import (
+	"bytes"
+	"errors"
+
+	"github.com/cosmos/cosmos-sdk/store/types"
+)
+
+// mvsMergeIterator merges a parent Iterator and a cache Iterator.
+// The cache iterator may return nil keys to signal that an item
+// had been deleted (but not deleted in the parent).
+// If the cache iterator has the same key as the parent, the
+// cache shadows (overrides) the parent.
+type mvsMergeIterator struct {
+	parent    types.Iterator
+	cache     types.Iterator
+	ascending bool
+}
+
+var _ types.Iterator = (*mvsMergeIterator)(nil)
+
+func NewMVSMergeIterator(
+	parent, cache types.Iterator,
+	ascending bool,
+) *mvsMergeIterator {
+	iter := &mvsMergeIterator{
+		parent:    parent,
+		cache:     cache,
+		ascending: ascending,
+	}
+
+	return iter
+}
+
+// Domain implements Iterator.
+// It returns the union of the iter.Parent doman, and the iter.Cache domain.
+// If the domains are disjoint, this includes the domain in between them as well.
+func (iter *mvsMergeIterator) Domain() (start, end []byte) {
+	startP, endP := iter.parent.Domain()
+	startC, endC := iter.cache.Domain()
+
+	if iter.compare(startP, startC) < 0 {
+		start = startP
+	} else {
+		start = startC
+	}
+
+	if iter.compare(endP, endC) < 0 {
+		end = endC
+	} else {
+		end = endP
+	}
+
+	return start, end
+}
+
+// Valid implements Iterator.
+func (iter *mvsMergeIterator) Valid() bool {
+	return iter.skipUntilExistsOrInvalid()
+}
+
+// Next implements Iterator
+func (iter *mvsMergeIterator) Next() {
+	iter.skipUntilExistsOrInvalid()
+	iter.assertValid()
+
+	// If parent is invalid, get the next cache item.
+	if !iter.parent.Valid() {
+		iter.cache.Next()
+		return
+	}
+
+	// If cache is invalid, get the next parent item.
+	if !iter.cache.Valid() {
+		iter.parent.Next()
+		return
+	}
+
+	// Both are valid.  Compare keys.
+	keyP, keyC := iter.parent.Key(), iter.cache.Key()
+	switch iter.compare(keyP, keyC) {
+	case -1: // parent < cache
+		iter.parent.Next()
+	case 0: // parent == cache
+		iter.parent.Next()
+		iter.cache.Next()
+	case 1: // parent > cache
+		iter.cache.Next()
+	}
+}
+
+// Key implements Iterator
+func (iter *mvsMergeIterator) Key() []byte {
+	iter.skipUntilExistsOrInvalid()
+	iter.assertValid()
+
+	// If parent is invalid, get the cache key.
+	if !iter.parent.Valid() {
+		return iter.cache.Key()
+	}
+
+	// If cache is invalid, get the parent key.
+	if !iter.cache.Valid() {
+		return iter.parent.Key()
+	}
+
+	// Both are valid.  Compare keys.
+	keyP, keyC := iter.parent.Key(), iter.cache.Key()
+
+	cmp := iter.compare(keyP, keyC)
+	switch cmp {
+	case -1: // parent < cache
+		return keyP
+	case 0: // parent == cache
+		return keyP
+	case 1: // parent > cache
+		return keyC
+	default:
+		panic("invalid compare result")
+	}
+}
+
+// Value implements Iterator
+func (iter *mvsMergeIterator) Value() []byte {
+	iter.skipUntilExistsOrInvalid()
+	iter.assertValid()
+
+	// If parent is invalid, get the cache value.
+	if !iter.parent.Valid() {
+		value := iter.cache.Value()
+		return value
+	}
+
+	// If cache is invalid, get the parent value.
+	if !iter.cache.Valid() {
+		value := iter.parent.Value()
+		return value
+	}
+
+	// Both are valid.  Compare keys.
+	keyP, keyC := iter.parent.Key(), iter.cache.Key()
+
+	cmp := iter.compare(keyP, keyC)
+	switch cmp {
+	case -1: // parent < cache
+		value := iter.parent.Value()
+		return value
+	case 0, 1: // parent >= cache
+		value := iter.cache.Value()
+		return value
+	default:
+		panic("invalid comparison result")
+	}
+}
+
+// Close implements Iterator
+func (iter *mvsMergeIterator) Close() error {
+	if err := iter.parent.Close(); err != nil {
+		// still want to close cache iterator regardless
+		iter.cache.Close()
+		return err
+	}
+
+	return iter.cache.Close()
+}
+
+// Error returns an error if the mvsMergeIterator is invalid defined by the
+// Valid method.
+func (iter *mvsMergeIterator) Error() error {
+	if !iter.Valid() {
+		return errors.New("invalid mvsMergeIterator")
+	}
+
+	return nil
+}
+
+// If not valid, panics.
+// NOTE: May have side-effect of iterating over cache.
+func (iter *mvsMergeIterator) assertValid() {
+	if err := iter.Error(); err != nil {
+		panic(err)
+	}
+}
+
+// Like bytes.Compare but opposite if not ascending.
+func (iter *mvsMergeIterator) compare(a, b []byte) int {
+	if iter.ascending {
+		return bytes.Compare(a, b)
+	}
+
+	return bytes.Compare(a, b) * -1
+}
+
+// Skip all delete-items from the cache w/ `key < until`.  After this function,
+// current cache item is a non-delete-item, or `until <= key`.
+// If the current cache item is not a delete item, does nothing.
+// If `until` is nil, there is no limit, and cache may end up invalid.
+// CONTRACT: cache is valid.
+func (iter *mvsMergeIterator) skipCacheDeletes(until []byte) {
+	for iter.cache.Valid() &&
+		iter.cache.Value() == nil &&
+		(until == nil || iter.compare(iter.cache.Key(), until) < 0) {
+		iter.cache.Next()
+	}
+}
+
+// Fast forwards cache (or parent+cache in case of deleted items) until current
+// item exists, or until iterator becomes invalid.
+// Returns whether the iterator is valid.
+func (iter *mvsMergeIterator) skipUntilExistsOrInvalid() bool {
+	for {
+		// If parent is invalid, fast-forward cache.
+		if !iter.parent.Valid() {
+			iter.skipCacheDeletes(nil)
+			return iter.cache.Valid()
+		}
+		// Parent is valid.
+		if !iter.cache.Valid() {
+			return true
+		}
+		// Parent is valid, cache is valid.
+
+		// Compare parent and cache.
+		keyP := iter.parent.Key()
+		keyC := iter.cache.Key()
+
+		switch iter.compare(keyP, keyC) {
+		case -1: // parent < cache.
+			return true
+
+		case 0: // parent == cache.
+			// Skip over if cache item is a delete.
+			valueC := iter.cache.Value()
+			if valueC == nil {
+				iter.parent.Next()
+				iter.cache.Next()
+
+				continue
+			}
+			// Cache is not a delete.
+
+			return true // cache exists.
+		case 1: // cache < parent
+			// Skip over if cache item is a delete.
+			valueC := iter.cache.Value()
+			if valueC == nil {
+				iter.skipCacheDeletes(keyP)
+				continue
+			}
+			// Cache is not a delete.
+
+			return true // cache exists.
+		}
+	}
+}
diff --git a/store/multiversion/mvkv.go b/store/multiversion/mvkv.go
index 697561355..b96338c1e 100644
--- a/store/multiversion/mvkv.go
+++ b/store/multiversion/mvkv.go
@@ -20,8 +20,6 @@ type VersionIndexedStore struct {
 	writeset map[string][]byte // contains the key -> value mapping for all keys written to the store
 	// TODO: need to add iterateset here as well
 
-	// TODO: do we need this? - I think so? / maybe we just treat `nil` value in the writeset as a delete
-	deleted *sync.Map
 	// dirty keys that haven't been sorted yet for iteration
 	dirtySet map[string]struct{}
 	// used for iterators - populated at the time of iterator instantiation
@@ -43,7 +41,6 @@ func NewVersionIndexedStore(parent types.KVStore, multiVersionStore MultiVersion
 	return &VersionIndexedStore{
 		readset:           make(map[string][]byte),
 		writeset:          make(map[string][]byte),
-		deleted:           &sync.Map{},
 		dirtySet:          make(map[string]struct{}),
 		sortedStore:       dbm.NewMemDB(),
 		parent:            parent,
@@ -191,12 +188,63 @@ func (store *VersionIndexedStore) Set(key []byte, value []byte) {
 
 // Iterator implements types.KVStore.
 func (v *VersionIndexedStore) Iterator(start []byte, end []byte) dbm.Iterator {
-	panic("unimplemented")
+	return v.iterator(start, end, true)
 }
 
 // ReverseIterator implements types.KVStore.
 func (v *VersionIndexedStore) ReverseIterator(start []byte, end []byte) dbm.Iterator {
-	panic("unimplemented")
+	return v.iterator(start, end, false)
+}
+
+// TODO: still needs iterateset tracking
+// Iterator implements types.KVStore.
+func (store *VersionIndexedStore) iterator(start []byte, end []byte, ascending bool) dbm.Iterator {
+	store.mtx.Lock()
+	defer store.mtx.Unlock()
+	// TODO: ideally we persist writeset keys into a sorted btree for later use
+	// make a set of total keys across mvkv and mvs to iterate
+	keysToIterate := make(map[string]struct{})
+	for key := range store.writeset {
+		keysToIterate[key] = struct{}{}
+	}
+
+	// TODO: ideally we take advantage of mvs keys already being sorted
+	// get the multiversion store sorted keys
+	writesetMap := store.multiVersionStore.GetAllWritesetKeys()
+	for i := 0; i < store.transactionIndex; i++ {
+		// add all the writesets keys up until current index
+		for _, key := range writesetMap[i] {
+			keysToIterate[key] = struct{}{}
+		}
+	}
+	// TODO: ideally merge btree and mvs keys into a single sorted btree
+
+	// TODO: this is horribly inefficient, fix this
+	sortedKeys := make([]string, len(keysToIterate))
+	for key := range keysToIterate {
+		sortedKeys = append(sortedKeys, key)
+	}
+	sort.Strings(sortedKeys)
+
+	memDB := dbm.NewMemDB()
+	for _, key := range sortedKeys {
+		memDB.Set([]byte(key), []byte{})
+	}
+
+	var parent, memIterator types.Iterator
+
+	// make a memIterator
+	memIterator = store.newMemIterator(start, end, memDB, ascending)
+
+	if ascending {
+		parent = store.parent.Iterator(start, end)
+	} else {
+		parent = store.parent.ReverseIterator(start, end)
+	}
+
+	// mergeIterator
+	return NewMVSMergeIterator(parent, memIterator, ascending)
+
 }
 
 // GetStoreType implements types.KVStore.
@@ -230,11 +278,6 @@ func (store *VersionIndexedStore) setValue(key, value []byte, deleted bool, dirt
 
 	keyStr := string(key)
 	store.writeset[keyStr] = value
-	if deleted {
-		store.deleted.Store(keyStr, struct{}{})
-	} else {
-		store.deleted.Delete(keyStr)
-	}
 	if dirty {
 		store.dirtySet[keyStr] = struct{}{}
 	}
@@ -261,8 +304,3 @@ func (store *VersionIndexedStore) updateReadSet(key []byte, value []byte) {
 	// add to dirty set
 	store.dirtySet[keyStr] = struct{}{}
 }
-
-func (store *VersionIndexedStore) isDeleted(key string) bool {
-	_, ok := store.deleted.Load(key)
-	return ok
-}
diff --git a/store/multiversion/mvkv_test.go b/store/multiversion/mvkv_test.go
index e17cba65c..23bcc472a 100644
--- a/store/multiversion/mvkv_test.go
+++ b/store/multiversion/mvkv_test.go
@@ -296,3 +296,77 @@ func TestVersionIndexedStoreValidation(t *testing.T) {
 		vis.ValidateReadset()
 	})
 }
+
+func TestIterator(t *testing.T) {
+	mem := dbadapter.Store{DB: dbm.NewMemDB()}
+	parentKVStore := cachekv.NewStore(mem, types.NewKVStoreKey("mock"), 1000)
+	mvs := multiversion.NewMultiVersionStore(parentKVStore)
+	// initialize a new VersionIndexedStore
+	abortC := make(chan scheduler.Abort)
+	vis := multiversion.NewVersionIndexedStore(parentKVStore, mvs, 2, 2, abortC)
+
+	// set some initial values
+	parentKVStore.Set([]byte("key4"), []byte("value4"))
+	parentKVStore.Set([]byte("key5"), []byte("value5"))
+	parentKVStore.Set([]byte("deletedKey"), []byte("foo"))
+	mvs.SetWriteset(0, 1, map[string][]byte{
+		"key1":       []byte("value1"),
+		"key2":       []byte("value2"),
+		"deletedKey": nil,
+	})
+	// add an estimate to MVS
+	mvs.SetEstimatedWriteset(3, 1, map[string][]byte{
+		"key3": []byte("value1_b"),
+	})
+
+	// iterate over the keys - exclusive on key5
+	iter := vis.Iterator([]byte("000"), []byte("key5"))
+	vals := []string{}
+	defer iter.Close()
+	for ; iter.Valid(); iter.Next() {
+		vals = append(vals, string(iter.Value()))
+	}
+	require.Equal(t, []string{"value1", "value2", "value4"}, vals)
+	iter.Close()
+
+	// test reverse iteration
+	vals2 := []string{}
+	iter2 := vis.ReverseIterator([]byte("000"), []byte("key6"))
+	defer iter2.Close()
+	for ; iter2.Valid(); iter2.Next() {
+		vals2 = append(vals2, string(iter2.Value()))
+	}
+	// has value5 because of end being key6
+	require.Equal(t, []string{"value5", "value4", "value2", "value1"}, vals2)
+	iter2.Close()
+
+	// add items to writeset
+	vis.Set([]byte("key3"), []byte("value3"))
+	vis.Set([]byte("key4"), []byte("valueNew"))
+
+	// iterate over the keys - exclusive on key5
+	iter3 := vis.Iterator([]byte("000"), []byte("key5"))
+	vals3 := []string{}
+	defer iter3.Close()
+	for ; iter3.Valid(); iter3.Next() {
+		vals3 = append(vals3, string(iter3.Value()))
+	}
+	require.Equal(t, []string{"value1", "value2", "value3", "valueNew"}, vals3)
+	iter3.Close()
+
+	// add an estimate to MVS
+	mvs.SetEstimatedWriteset(1, 1, map[string][]byte{
+		"key2": []byte("value1_b"),
+	})
+
+	go func() {
+		// new iter
+		iter4 := vis.Iterator([]byte("000"), []byte("key5"))
+		defer iter4.Close()
+		for ; iter4.Valid(); iter4.Next() {
+		}
+	}()
+	abort := <-abortC // read the abort from the channel
+	require.Equal(t, 1, abort.DependentTxIdx)
+
+}

From 663716a2e52e027d638c48e774909c4b9dcb28f3 Mon Sep 17 00:00:00 2001
From: Uday Patil <udpatil@gmail.com>
Date: Tue, 17 Oct 2023 13:52:21 -0500
Subject: [PATCH 12/65] fix dependency (#334)

## Describe your changes and provide context
This fixes a dependency that was refactored, and enables commit push CI
for occ-main

## Testing performed to validate your change
CI
---
 .github/workflows/test.yml        | 1 +
 store/multiversion/memiterator.go | 6 +++---
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 61fe636ac..160148f78 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -6,6 +6,7 @@ on:
   push:
     branches:
       - main
+      - occ-main # TODO: remove after occ work is done
 
 permissions:
   contents: read
diff --git a/store/multiversion/memiterator.go b/store/multiversion/memiterator.go
index a3fb74323..17ffdd0d6 100644
--- a/store/multiversion/memiterator.go
+++ b/store/multiversion/memiterator.go
@@ -4,7 +4,7 @@ import (
 	dbm "github.com/tendermint/tm-db"
 
 	"github.com/cosmos/cosmos-sdk/store/types"
-	scheduler "github.com/cosmos/cosmos-sdk/types/occ"
+	occtypes "github.com/cosmos/cosmos-sdk/types/occ"
 )
 
 // Iterates over iterKVCache items.
@@ -16,7 +16,7 @@ type memIterator struct {
 	mvStore      MultiVersionStore
 	writeset     map[string][]byte
 	index        int
-	abortChannel chan scheduler.Abort
+	abortChannel chan occtypes.Abort
 }
 
 func (store *VersionIndexedStore) newMemIterator(
@@ -63,7 +63,7 @@ func (mi *memIterator) Value() []byte {
 
 	// if we have an estiamte, write to abort channel
 	if val.IsEstimate() {
-		mi.abortChannel <- scheduler.NewEstimateAbort(val.Index())
+		mi.abortChannel <- occtypes.NewEstimateAbort(val.Index())
 	}
 
 	// if we have a deleted value, return nil

From b34d61cfc9acab9add62e939dfd3935ef52e95bf Mon Sep 17 00:00:00 2001
From: Uday Patil <udpatil@gmail.com>
Date: Thu, 19 Oct 2023 09:51:08 -0500
Subject: [PATCH 13/65] [occ] Iterateset tracking and validation implementation
 (#337)

## Describe your changes and provide context
This implements a tracked iterator that is used to keep track of keys
that have been iterated, and to also save metadata about the iteration
for LATER validation. The iterator will be replayed and if there are any
new keys / any keys missing within the iteration range, it will fail
validation. the actual values served by the iterator are covered by
readset validation.

Additionally, the early stop behavior allows the iterateset to ONLY be
sensitive to changes to the keys available WITHIN the iteration range.
In the event that we perform iteration, and THEN write a key within the
range of iteration, this will not fail iteration because we take a
snapshot of the mvkv writeset at the moment of iteration, so when we
replay the iterator, we populate that iterator with the writeset at that
time, so we appropriately replicate the iterator behavior.

In the case that we encounter an ESTIMATE, we have to terminate the
iterator validation and mark it as failed because it is impossible to
know whether that ESTIMATE represents a value change or a delete, since
the latter, will affect the keys available for iteration.

This change also implements handlers that iterators receive for updating
readset and iterateset in the `mvkv`

## Testing performed to validate your change
Unit tests for various iteration scenarios
---
 store/multiversion/memiterator.go     |  52 +++-
 store/multiversion/mergeiterator.go   |   9 +-
 store/multiversion/mvkv.go            | 116 ++++++--
 store/multiversion/mvkv_test.go       |  17 ++
 store/multiversion/store.go           | 120 +++++++-
 store/multiversion/store_test.go      | 392 +++++++++++++++++++++++++-
 store/multiversion/trackediterator.go |  57 ++++
 7 files changed, 706 insertions(+), 57 deletions(-)
 create mode 100644 store/multiversion/trackediterator.go

diff --git a/store/multiversion/memiterator.go b/store/multiversion/memiterator.go
index 17ffdd0d6..43e8e306b 100644
--- a/store/multiversion/memiterator.go
+++ b/store/multiversion/memiterator.go
@@ -14,15 +14,17 @@ type memIterator struct {
 	types.Iterator
 
 	mvStore      MultiVersionStore
-	writeset     map[string][]byte
+	writeset     WriteSet
 	index        int
 	abortChannel chan occtypes.Abort
+	ReadsetHandler
 }
 
 func (store *VersionIndexedStore) newMemIterator(
 	start, end []byte,
 	items *dbm.MemDB,
 	ascending bool,
+	readsetHandler ReadsetHandler,
 ) *memIterator {
 	var iter types.Iterator
 	var err error
@@ -41,11 +43,12 @@ func (store *VersionIndexedStore) newMemIterator(
 	}
 
 	return &memIterator{
-		Iterator:     iter,
-		mvStore:      store.multiVersionStore,
-		index:        store.transactionIndex,
-		abortChannel: store.abortChannel,
-		writeset:     store.GetWriteset(),
+		Iterator:       iter,
+		mvStore:        store.multiVersionStore,
+		index:          store.transactionIndex,
+		abortChannel:   store.abortChannel,
+		writeset:       store.GetWriteset(),
+		ReadsetHandler: readsetHandler,
 	}
 }
 
@@ -66,9 +69,46 @@ func (mi *memIterator) Value() []byte {
 		mi.abortChannel <- occtypes.NewEstimateAbort(val.Index())
 	}
 
+	// need to update readset
 	// if we have a deleted value, return nil
 	if val.IsDeleted() {
+		defer mi.ReadsetHandler.UpdateReadSet(key, nil)
 		return nil
 	}
+	defer mi.ReadsetHandler.UpdateReadSet(key, val.Value())
 	return val.Value()
 }
+
+func (store *Store) newMVSValidationIterator(
+	index int,
+	start, end []byte,
+	items *dbm.MemDB,
+	ascending bool,
+	writeset WriteSet,
+	abortChannel chan occtypes.Abort,
+) *memIterator {
+	var iter types.Iterator
+	var err error
+
+	if ascending {
+		iter, err = items.Iterator(start, end)
+	} else {
+		iter, err = items.ReverseIterator(start, end)
+	}
+
+	if err != nil {
+		if iter != nil {
+			iter.Close()
+		}
+		panic(err)
+	}
+
+	return &memIterator{
+		Iterator:       iter,
+		mvStore:        store,
+		index:          index,
+		abortChannel:   abortChannel,
+		ReadsetHandler: NoOpHandler{},
+		writeset:       writeset,
+	}
+}
diff --git a/store/multiversion/mergeiterator.go b/store/multiversion/mergeiterator.go
index c0a9d23ef..3b5cee741 100644
--- a/store/multiversion/mergeiterator.go
+++ b/store/multiversion/mergeiterator.go
@@ -16,6 +16,7 @@ type mvsMergeIterator struct {
 	parent    types.Iterator
 	cache     types.Iterator
 	ascending bool
+	ReadsetHandler
 }
 
 var _ types.Iterator = (*mvsMergeIterator)(nil)
@@ -23,11 +24,13 @@ var _ types.Iterator = (*mvsMergeIterator)(nil)
 func NewMVSMergeIterator(
 	parent, cache types.Iterator,
 	ascending bool,
+	readsetHandler ReadsetHandler,
 ) *mvsMergeIterator {
 	iter := &mvsMergeIterator{
-		parent:    parent,
-		cache:     cache,
-		ascending: ascending,
+		parent:         parent,
+		cache:          cache,
+		ascending:      ascending,
+		ReadsetHandler: readsetHandler,
 	}
 
 	return iter
diff --git a/store/multiversion/mvkv.go b/store/multiversion/mvkv.go
index b96338c1e..1b2f947c1 100644
--- a/store/multiversion/mvkv.go
+++ b/store/multiversion/mvkv.go
@@ -12,12 +12,70 @@ import (
 	dbm "github.com/tendermint/tm-db"
 )
 
+// exposes a handler for adding items to readset, useful for iterators
+type ReadsetHandler interface {
+	UpdateReadSet(key []byte, value []byte)
+}
+
+type NoOpHandler struct{}
+
+func (NoOpHandler) UpdateReadSet(key []byte, value []byte) {}
+
+// exposes a handler for adding items to iterateset, to be called upon iterator close
+type IterateSetHandler interface {
+	UpdateIterateSet(iterationTracker)
+}
+
+type iterationTracker struct {
+	startKey     []byte              // start of the iteration range
+	endKey       []byte              // end of the iteration range
+	earlyStopKey []byte              // key that caused early stop
+	iteratedKeys map[string]struct{} // TODO: is a map okay because the ordering will be enforced when we replay the iterator?
+	ascending    bool
+
+	writeset WriteSet
+
+	// TODO: is it possible that terimation is affected by keys later in iteration that weren't reached? eg. number of keys affecting iteration?
+	// TODO: i believe to get number of keys the iteration would need to be done fully so its not a concern?
+
+	// TODO: maybe we need to store keys served from writeset for the transaction? that way if theres OTHER keys within the writeset and the iteration range, and were written to the writeset later, we can discriminate between the groups?
+	// keysServedFromWriteset map[string]struct{}
+
+	// actually its simpler to just store a copy of the writeset at the time of iterator creation
+}
+
+func NewIterationTracker(startKey, endKey []byte, ascending bool, writeset WriteSet) iterationTracker {
+	copyWriteset := make(WriteSet, len(writeset))
+
+	for key, value := range writeset {
+		copyWriteset[key] = value
+	}
+
+	return iterationTracker{
+		startKey:     startKey,
+		endKey:       endKey,
+		iteratedKeys: make(map[string]struct{}),
+		ascending:    ascending,
+		writeset:     copyWriteset,
+	}
+}
+
+func (item *iterationTracker) AddKey(key []byte) {
+	item.iteratedKeys[string(key)] = struct{}{}
+}
+
+func (item *iterationTracker) SetEarlyStopKey(key []byte) {
+	item.earlyStopKey = key
+}
+
 // Version Indexed Store wraps the multiversion store in a way that implements the KVStore interface, but also stores the index of the transaction, and so store actions are applied to the multiversion store using that index
 type VersionIndexedStore struct {
 	mtx sync.Mutex
 	// used for tracking reads and writes for eventual validation + persistence into multi-version store
-	readset  map[string][]byte // contains the key -> value mapping for all keys read from the store (not mvkv, underlying store)
-	writeset map[string][]byte // contains the key -> value mapping for all keys written to the store
+	// TODO: does this need sync.Map?
+	readset    map[string][]byte // contains the key -> value mapping for all keys read from the store (not mvkv, underlying store)
+	writeset   map[string][]byte // contains the key -> value mapping for all keys written to the store
+	iterateset Iterateset
 	// TODO: need to add iterateset here as well
 
 	// dirty keys that haven't been sorted yet for iteration
@@ -36,11 +94,14 @@ type VersionIndexedStore struct {
 }
 
 var _ types.KVStore = (*VersionIndexedStore)(nil)
+var _ ReadsetHandler = (*VersionIndexedStore)(nil)
+var _ IterateSetHandler = (*VersionIndexedStore)(nil)
 
 func NewVersionIndexedStore(parent types.KVStore, multiVersionStore MultiVersionStore, transactionIndex, incarnation int, abortChannel chan scheduler.Abort) *VersionIndexedStore {
 	return &VersionIndexedStore{
 		readset:           make(map[string][]byte),
 		writeset:          make(map[string][]byte),
+		iterateset:        []iterationTracker{},
 		dirtySet:          make(map[string]struct{}),
 		sortedStore:       dbm.NewMemDB(),
 		parent:            parent,
@@ -97,7 +158,7 @@ func (store *VersionIndexedStore) Get(key []byte) []byte {
 	}
 	// if we didn't find it in the multiversion store, then we want to check the parent store + add to readset
 	parentValue := store.parent.Get(key)
-	store.updateReadSet(key, parentValue)
+	store.UpdateReadSet(key, parentValue)
 	return parentValue
 }
 
@@ -107,7 +168,7 @@ func (store *VersionIndexedStore) parseValueAndUpdateReadset(strKey string, mvsV
 	if mvsValue.IsDeleted() {
 		value = nil
 	}
-	store.updateReadSet([]byte(strKey), value)
+	store.UpdateReadSet([]byte(strKey), value)
 	return value
 }
 
@@ -201,40 +262,22 @@ func (v *VersionIndexedStore) ReverseIterator(start []byte, end []byte) dbm.Iter
 func (store *VersionIndexedStore) iterator(start []byte, end []byte, ascending bool) dbm.Iterator {
 	store.mtx.Lock()
 	defer store.mtx.Unlock()
-	// TODO: ideally we persist writeset keys into a sorted btree for later use
-	// make a set of total keys across mvkv and mvs to iterate
-	keysToIterate := make(map[string]struct{})
-	for key := range store.writeset {
-		keysToIterate[key] = struct{}{}
-	}
 
+	// get the sorted keys from MVS
 	// TODO: ideally we take advantage of mvs keys already being sorted
-	// get the multiversion store sorted keys
-	writesetMap := store.multiVersionStore.GetAllWritesetKeys()
-	for i := 0; i < store.transactionIndex; i++ {
-		// add all the writesets keys up until current index
-		for _, key := range writesetMap[i] {
-			keysToIterate[key] = struct{}{}
-		}
-	}
 	// TODO: ideally merge btree and mvs keys into a single sorted btree
+	memDB := store.multiVersionStore.CollectIteratorItems(store.transactionIndex)
 
-	// TODO: this is horribly inefficient, fix this
-	sortedKeys := make([]string, len(keysToIterate))
-	for key := range keysToIterate {
-		sortedKeys = append(sortedKeys, key)
-	}
-	sort.Strings(sortedKeys)
-
-	memDB := dbm.NewMemDB()
-	for _, key := range sortedKeys {
+	// TODO: ideally we persist writeset keys into a sorted btree for later use
+	// make a set of total keys across mvkv and mvs to iterate
+	for key := range store.writeset {
 		memDB.Set([]byte(key), []byte{})
 	}
 
 	var parent, memIterator types.Iterator
 
 	// make a memIterator
-	memIterator = store.newMemIterator(start, end, memDB, ascending)
+	memIterator = store.newMemIterator(start, end, memDB, ascending, store)
 
 	if ascending {
 		parent = store.parent.Iterator(start, end)
@@ -242,8 +285,13 @@ func (store *VersionIndexedStore) iterator(start []byte, end []byte, ascending b
 		parent = store.parent.ReverseIterator(start, end)
 	}
 
+	mergeIterator := NewMVSMergeIterator(parent, memIterator, ascending, store)
+
+	iterationTracker := NewIterationTracker(start, end, ascending, store.writeset)
+	trackedIterator := NewTrackedIterator(mergeIterator, iterationTracker, store)
+
 	// mergeIterator
-	return NewMVSMergeIterator(parent, memIterator, ascending)
+	return trackedIterator
 
 }
 
@@ -288,6 +336,8 @@ func (store *VersionIndexedStore) WriteToMultiVersionStore() {
 	defer store.mtx.Unlock()
 	defer telemetry.MeasureSince(time.Now(), "store", "mvkv", "write_mvs")
 	store.multiVersionStore.SetWriteset(store.transactionIndex, store.incarnation, store.writeset)
+	store.multiVersionStore.SetReadset(store.transactionIndex, store.readset)
+	store.multiVersionStore.SetIterateset(store.transactionIndex, store.iterateset)
 }
 
 func (store *VersionIndexedStore) WriteEstimatesToMultiVersionStore() {
@@ -295,12 +345,18 @@ func (store *VersionIndexedStore) WriteEstimatesToMultiVersionStore() {
 	defer store.mtx.Unlock()
 	defer telemetry.MeasureSince(time.Now(), "store", "mvkv", "write_mvs")
 	store.multiVersionStore.SetEstimatedWriteset(store.transactionIndex, store.incarnation, store.writeset)
+	// TODO: do we need to write readset and iterateset in this case? I don't think so since if this is called it means we aren't doing validation
 }
 
-func (store *VersionIndexedStore) updateReadSet(key []byte, value []byte) {
+func (store *VersionIndexedStore) UpdateReadSet(key []byte, value []byte) {
 	// add to readset
 	keyStr := string(key)
 	store.readset[keyStr] = value
 	// add to dirty set
 	store.dirtySet[keyStr] = struct{}{}
 }
+
+func (store *VersionIndexedStore) UpdateIterateSet(iterationTracker iterationTracker) {
+	// append to iterateset
+	store.iterateset = append(store.iterateset, iterationTracker)
+}
diff --git a/store/multiversion/mvkv_test.go b/store/multiversion/mvkv_test.go
index 23bcc472a..44304fd50 100644
--- a/store/multiversion/mvkv_test.go
+++ b/store/multiversion/mvkv_test.go
@@ -321,6 +321,12 @@ func TestIterator(t *testing.T) {
 
 	// iterate over the keys - exclusive on key5
 	iter := vis.Iterator([]byte("000"), []byte("key5"))
+
+	// verify domain is superset
+	start, end := iter.Domain()
+	require.Equal(t, []byte("000"), start)
+	require.Equal(t, []byte("key5"), end)
+
 	vals := []string{}
 	defer iter.Close()
 	for ; iter.Valid(); iter.Next() {
@@ -354,6 +360,17 @@ func TestIterator(t *testing.T) {
 	require.Equal(t, []string{"value1", "value2", "value3", "valueNew"}, vals3)
 	iter3.Close()
 
+	vis.Set([]byte("key6"), []byte("value6"))
+	// iterate over the keys, writeset being the last of the iteration range
+	iter4 := vis.Iterator([]byte("000"), []byte("key7"))
+	vals4 := []string{}
+	defer iter4.Close()
+	for ; iter4.Valid(); iter4.Next() {
+		vals4 = append(vals4, string(iter4.Value()))
+	}
+	require.Equal(t, []string{"value1", "value2", "value3", "valueNew", "value5", "value6"}, vals4)
+	iter4.Close()
+
 	// add an estimate to MVS
 	mvs.SetEstimatedWriteset(1, 1, map[string][]byte{
 		"key2": []byte("value1_b"),
diff --git a/store/multiversion/store.go b/store/multiversion/store.go
index 08c45204b..0d16f12d6 100644
--- a/store/multiversion/store.go
+++ b/store/multiversion/store.go
@@ -8,6 +8,8 @@ import (
 
 	"github.com/cosmos/cosmos-sdk/store/types"
 	"github.com/cosmos/cosmos-sdk/telemetry"
+	occtypes "github.com/cosmos/cosmos-sdk/types/occ"
+	db "github.com/tendermint/tm-db"
 )
 
 type MultiVersionStore interface {
@@ -19,13 +21,17 @@ type MultiVersionStore interface {
 	InvalidateWriteset(index int, incarnation int)
 	SetEstimatedWriteset(index int, incarnation int, writeset WriteSet)
 	GetAllWritesetKeys() map[int][]string
+	CollectIteratorItems(index int) *db.MemDB
 	SetReadset(index int, readset ReadSet)
 	GetReadset(index int) ReadSet
-	ValidateTransactionState(index int) []int
+	SetIterateset(index int, iterateset Iterateset)
+	GetIterateset(index int) Iterateset
+	ValidateTransactionState(index int) (bool, []int)
 }
 
 type WriteSet map[string][]byte
 type ReadSet map[string][]byte
+type Iterateset []iterationTracker
 
 var _ MultiVersionStore = (*Store)(nil)
 
@@ -37,6 +43,7 @@ type Store struct {
 
 	txWritesetKeys map[int][]string // map of tx index -> writeset keys
 	txReadSets     map[int]ReadSet
+	txIterateSets  map[int]Iterateset
 
 	parentStore types.KVStore
 }
@@ -46,6 +53,7 @@ func NewMultiVersionStore(parentStore types.KVStore) *Store {
 		multiVersionMap: make(map[string]MultiVersionValue),
 		txWritesetKeys:  make(map[int][]string),
 		txReadSets:      make(map[int]ReadSet),
+		txIterateSets:   make(map[int]Iterateset),
 		parentStore:     parentStore,
 	}
 }
@@ -212,9 +220,107 @@ func (s *Store) GetReadset(index int) ReadSet {
 	return s.txReadSets[index]
 }
 
-func (s *Store) ValidateTransactionState(index int) []int {
+func (s *Store) SetIterateset(index int, iterateset Iterateset) {
+	s.mtx.Lock()
+	defer s.mtx.Unlock()
+
+	s.txIterateSets[index] = iterateset
+}
+
+func (s *Store) GetIterateset(index int) Iterateset {
+	s.mtx.RLock()
+	defer s.mtx.RUnlock()
+
+	return s.txIterateSets[index]
+}
+
+// CollectIteratorItems implements MultiVersionStore. It will return a memDB containing all of the keys present in the multiversion store within the iteration range prior to (exclusive of) the index.
+func (s *Store) CollectIteratorItems(index int) *db.MemDB {
+	sortedItems := db.NewMemDB()
+
+	// get all writeset keys prior to index
+	keys := s.GetAllWritesetKeys()
+	for i := 0; i < index; i++ {
+		indexedWriteset, ok := keys[i]
+		if !ok {
+			continue
+		}
+		// TODO: do we want to exclude keys out of the range or just let the iterator handle it?
+		for _, key := range indexedWriteset {
+			// TODO: inefficient because (logn) for each key + rebalancing? maybe theres a better way to add to a tree to reduce rebalancing overhead
+			sortedItems.Set([]byte(key), []byte{})
+		}
+	}
+	return sortedItems
+}
+
+func (s *Store) validateIterator(index int, tracker iterationTracker) bool {
+	// collect items from multiversion store
+	sortedItems := s.CollectIteratorItems(index)
+	// add the iterationtracker writeset keys to the sorted items
+	for key := range tracker.writeset {
+		sortedItems.Set([]byte(key), []byte{})
+	}
+	validChannel := make(chan bool, 1)
+	abortChannel := make(chan occtypes.Abort, 1)
+
+	// listen for abort while iterating
+	go func(iterationTracker iterationTracker, items *db.MemDB, returnChan chan bool, abortChan chan occtypes.Abort) {
+		var parentIter types.Iterator
+		expectedKeys := iterationTracker.iteratedKeys
+		iter := s.newMVSValidationIterator(index, iterationTracker.startKey, iterationTracker.endKey, items, iterationTracker.ascending, iterationTracker.writeset, abortChan)
+		if iterationTracker.ascending {
+			parentIter = s.parentStore.Iterator(iterationTracker.startKey, iterationTracker.endKey)
+		} else {
+			parentIter = s.parentStore.ReverseIterator(iterationTracker.startKey, iterationTracker.endKey)
+		}
+		// create a new MVSMergeiterator
+		mergeIterator := NewMVSMergeIterator(parentIter, iter, iterationTracker.ascending, NoOpHandler{})
+		defer mergeIterator.Close()
+		for ; mergeIterator.Valid(); mergeIterator.Next() {
+			if len(expectedKeys) == 0 {
+				// if we have no more expected keys, then the iterator is invalid
+				returnChan <- false
+				return
+			}
+			key := mergeIterator.Key()
+			if _, ok := expectedKeys[string(key)]; !ok {
+				// if key isn't found
+				returnChan <- false
+				return
+			}
+			// remove from expected keys
+			delete(expectedKeys, string(key))
+
+			// if our iterator key was the early stop, then we can break
+			if bytes.Equal(key, iterationTracker.earlyStopKey) {
+				returnChan <- true
+				return
+			}
+		}
+		returnChan <- !(len(expectedKeys) > 0)
+	}(tracker, sortedItems, validChannel, abortChannel)
+	select {
+	case <-abortChannel:
+		// if we get an abort, then we know that the iterator is invalid
+		return false
+	case valid := <-validChannel:
+		return valid
+	}
+}
+
+// TODO: do we want to return bool + []int where bool indicates whether it was valid and then []int indicates only ones for which we need to wait due to estimates? - yes i think so?
+func (s *Store) ValidateTransactionState(index int) (bool, []int) {
 	defer telemetry.MeasureSince(time.Now(), "store", "mvs", "validate")
 	conflictSet := map[int]struct{}{}
+	valid := true
+
+	// TODO: can we parallelize for all iterators?
+	iterateset := s.GetIterateset(index)
+	for _, iterationTracker := range iterateset {
+		iteratorValid := s.validateIterator(index, iterationTracker)
+		valid = valid && iteratorValid
+	}
 
 	// validate readset
 	readset := s.GetReadset(index)
@@ -229,20 +335,20 @@ func (s *Store) ValidateTransactionState(index int) []int {
 				panic("there shouldn't be readset conflicts with parent kv store, since it shouldn't change")
 			}
 		} else {
-			// if estimate, mark as conflict index
+			// if estimate, mark as conflict index - but don't invalidate
 			if latestValue.IsEstimate() {
 				conflictSet[latestValue.Index()] = struct{}{}
 			} else if latestValue.IsDeleted() {
 				if value != nil {
 					// conflict
-					conflictSet[latestValue.Index()] = struct{}{}
+					// TODO: would we want to return early?
+					valid = false
 				}
 			} else if !bytes.Equal(latestValue.Value(), value) {
-				conflictSet[latestValue.Index()] = struct{}{}
+				valid = false
 			}
 		}
 	}
-	// TODO: validate iterateset
 
 	// convert conflictset into sorted indices
 	conflictIndices := make([]int, 0, len(conflictSet))
@@ -251,7 +357,7 @@ func (s *Store) ValidateTransactionState(index int) []int {
 	}
 
 	sort.Ints(conflictIndices)
-	return conflictIndices
+	return valid, conflictIndices
 }
 
 func (s *Store) WriteLatestToStore() {
diff --git a/store/multiversion/store_test.go b/store/multiversion/store_test.go
index bb56d1e71..84e9f77ac 100644
--- a/store/multiversion/store_test.go
+++ b/store/multiversion/store_test.go
@@ -1,10 +1,12 @@
 package multiversion_test
 
 import (
+	"bytes"
 	"testing"
 
 	"github.com/cosmos/cosmos-sdk/store/dbadapter"
 	"github.com/cosmos/cosmos-sdk/store/multiversion"
+	"github.com/cosmos/cosmos-sdk/types/occ"
 	"github.com/stretchr/testify/require"
 	dbm "github.com/tendermint/tm-db"
 )
@@ -191,11 +193,13 @@ func TestMultiVersionStoreValidateState(t *testing.T) {
 	mvs.SetReadset(5, readset)
 
 	// assert no readset is valid
-	conflicts := mvs.ValidateTransactionState(4)
+	valid, conflicts := mvs.ValidateTransactionState(4)
+	require.True(t, valid)
 	require.Empty(t, conflicts)
 
 	// assert readset index 5 is valid
-	conflicts = mvs.ValidateTransactionState(5)
+	valid, conflicts = mvs.ValidateTransactionState(5)
+	require.True(t, valid)
 	require.Empty(t, conflicts)
 
 	// introduce conflict
@@ -203,29 +207,395 @@ func TestMultiVersionStoreValidateState(t *testing.T) {
 		"key3": []byte("value6"),
 	})
 
-	// expect index 2 to be returned
-	conflicts = mvs.ValidateTransactionState(5)
-	require.Equal(t, []int{2}, conflicts)
+	// expect failure with empty conflicts
+	valid, conflicts = mvs.ValidateTransactionState(5)
+	require.False(t, valid)
+	require.Empty(t, conflicts)
 
 	// add a conflict due to deletion
 	mvs.SetWriteset(3, 1, map[string][]byte{
 		"key1": nil,
 	})
 
-	// expect indices 2 and 3 to be returned
-	conflicts = mvs.ValidateTransactionState(5)
-	require.Equal(t, []int{2, 3}, conflicts)
+	// expect failure with empty conflicts
+	valid, conflicts = mvs.ValidateTransactionState(5)
+	require.False(t, valid)
+	require.Empty(t, conflicts)
 
 	// add a conflict due to estimate
 	mvs.SetEstimatedWriteset(4, 1, map[string][]byte{
 		"key2": []byte("test"),
 	})
 
-	// expect indices 2, 3, and 4to be returned
-	conflicts = mvs.ValidateTransactionState(5)
-	require.Equal(t, []int{2, 3, 4}, conflicts)
+	// expect index 4 to be returned
+	valid, conflicts = mvs.ValidateTransactionState(5)
+	require.False(t, valid)
+	require.Equal(t, []int{4}, conflicts)
 
 	// assert panic for parent store mismatch
 	parentKVStore.Set([]byte("key5"), []byte("value6"))
 	require.Panics(t, func() { mvs.ValidateTransactionState(5) })
 }
+
+func TestMVSValidationWithOnlyEstimate(t *testing.T) {
+	parentKVStore := dbadapter.Store{DB: dbm.NewMemDB()}
+	mvs := multiversion.NewMultiVersionStore(parentKVStore)
+
+	parentKVStore.Set([]byte("key2"), []byte("value0"))
+	parentKVStore.Set([]byte("key3"), []byte("value3"))
+	parentKVStore.Set([]byte("key4"), []byte("value4"))
+	parentKVStore.Set([]byte("key5"), []byte("value5"))
+
+	writeset := make(multiversion.WriteSet)
+	writeset["key1"] = []byte("value1")
+	writeset["key2"] = []byte("value2")
+	writeset["key3"] = nil
+	mvs.SetWriteset(1, 2, writeset)
+
+	readset := make(multiversion.ReadSet)
+	readset["key1"] = []byte("value1")
+	readset["key2"] = []byte("value2")
+	readset["key3"] = nil
+	readset["key4"] = []byte("value4")
+	readset["key5"] = []byte("value5")
+	mvs.SetReadset(5, readset)
+
+	// add a conflict due to estimate
+	mvs.SetEstimatedWriteset(4, 1, map[string][]byte{
+		"key2": []byte("test"),
+	})
+
+	valid, conflicts := mvs.ValidateTransactionState(5)
+	require.True(t, valid)
+	require.Equal(t, []int{4}, conflicts)
+
+}
+
+func TestMVSIteratorValidation(t *testing.T) {
+	parentKVStore := dbadapter.Store{DB: dbm.NewMemDB()}
+	mvs := multiversion.NewMultiVersionStore(parentKVStore)
+	vis := multiversion.NewVersionIndexedStore(parentKVStore, mvs, 5, 1, make(chan occ.Abort))
+
+	parentKVStore.Set([]byte("key2"), []byte("value0"))
+	parentKVStore.Set([]byte("key3"), []byte("value3"))
+	parentKVStore.Set([]byte("key4"), []byte("value4"))
+	parentKVStore.Set([]byte("key5"), []byte("value5"))
+
+	writeset := make(multiversion.WriteSet)
+	writeset["key1"] = []byte("value1")
+	writeset["key2"] = []byte("value2")
+	writeset["key3"] = nil
+	mvs.SetWriteset(1, 2, writeset)
+
+	// test basic iteration
+	iter := vis.ReverseIterator([]byte("key1"), []byte("key6"))
+	for ; iter.Valid(); iter.Next() {
+		// read value
+		iter.Value()
+	}
+	iter.Close()
+	vis.WriteToMultiVersionStore()
+
+	// should be valid
+	valid, conflicts := mvs.ValidateTransactionState(5)
+	require.True(t, valid)
+	require.Empty(t, conflicts)
+}
+
+func TestMVSIteratorValidationWithEstimate(t *testing.T) {
+	parentKVStore := dbadapter.Store{DB: dbm.NewMemDB()}
+	mvs := multiversion.NewMultiVersionStore(parentKVStore)
+	vis := multiversion.NewVersionIndexedStore(parentKVStore, mvs, 5, 1, make(chan occ.Abort))
+
+	parentKVStore.Set([]byte("key2"), []byte("value0"))
+	parentKVStore.Set([]byte("key3"), []byte("value3"))
+	parentKVStore.Set([]byte("key4"), []byte("value4"))
+	parentKVStore.Set([]byte("key5"), []byte("value5"))
+
+	writeset := make(multiversion.WriteSet)
+	writeset["key1"] = []byte("value1")
+	writeset["key2"] = []byte("value2")
+	writeset["key3"] = nil
+	mvs.SetWriteset(1, 2, writeset)
+
+	iter := vis.Iterator([]byte("key1"), []byte("key6"))
+	for ; iter.Valid(); iter.Next() {
+		// read value
+		iter.Value()
+	}
+	iter.Close()
+	vis.WriteToMultiVersionStore()
+
+	writeset2 := make(multiversion.WriteSet)
+	writeset2["key2"] = []byte("value2")
+	mvs.SetEstimatedWriteset(2, 2, writeset2)
+
+	// should be invalid
+	valid, conflicts := mvs.ValidateTransactionState(5)
+	require.False(t, valid)
+	require.Equal(t, []int{2}, conflicts)
+}
+
+func TestMVSIteratorValidationWithKeySwitch(t *testing.T) {
+	parentKVStore := dbadapter.Store{DB: dbm.NewMemDB()}
+	mvs := multiversion.NewMultiVersionStore(parentKVStore)
+	vis := multiversion.NewVersionIndexedStore(parentKVStore, mvs, 5, 1, make(chan occ.Abort))
+
+	parentKVStore.Set([]byte("key2"), []byte("value0"))
+	parentKVStore.Set([]byte("key3"), []byte("value3"))
+	parentKVStore.Set([]byte("key4"), []byte("value4"))
+	parentKVStore.Set([]byte("key5"), []byte("value5"))
+
+	writeset := make(multiversion.WriteSet)
+	writeset["key1"] = []byte("value1")
+	writeset["key2"] = []byte("value2")
+	writeset["key3"] = nil
+	mvs.SetWriteset(1, 2, writeset)
+
+	iter := vis.Iterator([]byte("key1"), []byte("key6"))
+	for ; iter.Valid(); iter.Next() {
+		// read value
+		iter.Value()
+	}
+	iter.Close()
+	vis.WriteToMultiVersionStore()
+
+	// deletion of 2 and introduction of 3
+	writeset2 := make(multiversion.WriteSet)
+	writeset2["key2"] = nil
+	writeset2["key3"] = []byte("valueX")
+	mvs.SetWriteset(2, 2, writeset2)
+
+	// should be invalid
+	valid, conflicts := mvs.ValidateTransactionState(5)
+	require.False(t, valid)
+	require.Empty(t, conflicts)
+}
+
+func TestMVSIteratorValidationWithKeyAdded(t *testing.T) {
+	parentKVStore := dbadapter.Store{DB: dbm.NewMemDB()}
+	mvs := multiversion.NewMultiVersionStore(parentKVStore)
+	vis := multiversion.NewVersionIndexedStore(parentKVStore, mvs, 5, 1, make(chan occ.Abort))
+
+	parentKVStore.Set([]byte("key2"), []byte("value0"))
+	parentKVStore.Set([]byte("key3"), []byte("value3"))
+	parentKVStore.Set([]byte("key4"), []byte("value4"))
+	parentKVStore.Set([]byte("key5"), []byte("value5"))
+
+	writeset := make(multiversion.WriteSet)
+	writeset["key1"] = []byte("value1")
+	writeset["key2"] = []byte("value2")
+	writeset["key3"] = nil
+	mvs.SetWriteset(1, 2, writeset)
+
+	iter := vis.Iterator([]byte("key1"), []byte("key7"))
+	for ; iter.Valid(); iter.Next() {
+		// read value
+		iter.Value()
+	}
+	iter.Close()
+	vis.WriteToMultiVersionStore()
+
+	// addition of key6
+	writeset2 := make(multiversion.WriteSet)
+	writeset2["key6"] = []byte("value6")
+	mvs.SetWriteset(2, 2, writeset2)
+
+	// should be invalid
+	valid, conflicts := mvs.ValidateTransactionState(5)
+	require.False(t, valid)
+	require.Empty(t, conflicts)
+}
+
+func TestMVSIteratorValidationWithWritesetValues(t *testing.T) {
+	parentKVStore := dbadapter.Store{DB: dbm.NewMemDB()}
+	mvs := multiversion.NewMultiVersionStore(parentKVStore)
+	vis := multiversion.NewVersionIndexedStore(parentKVStore, mvs, 5, 1, make(chan occ.Abort))
+
+	parentKVStore.Set([]byte("key2"), []byte("value0"))
+	parentKVStore.Set([]byte("key3"), []byte("value3"))
+	parentKVStore.Set([]byte("key4"), []byte("value4"))
+	parentKVStore.Set([]byte("key5"), []byte("value5"))
+
+	writeset := make(multiversion.WriteSet)
+	writeset["key1"] = []byte("value1")
+	writeset["key2"] = []byte("value2")
+	writeset["key3"] = nil
+	mvs.SetWriteset(1, 2, writeset)
+
+	// set a key BEFORE iteration occurred
+	vis.Set([]byte("key6"), []byte("value6"))
+
+	iter := vis.Iterator([]byte("key1"), []byte("key7"))
+	for ; iter.Valid(); iter.Next() {
+	}
+	iter.Close()
+	vis.WriteToMultiVersionStore()
+
+	// should be valid
+	valid, conflicts := mvs.ValidateTransactionState(5)
+	require.True(t, valid)
+	require.Empty(t, conflicts)
+}
+
+func TestMVSIteratorValidationWithWritesetValuesSetAfterIteration(t *testing.T) {
+	parentKVStore := dbadapter.Store{DB: dbm.NewMemDB()}
+	mvs := multiversion.NewMultiVersionStore(parentKVStore)
+	vis := multiversion.NewVersionIndexedStore(parentKVStore, mvs, 5, 1, make(chan occ.Abort))
+
+	parentKVStore.Set([]byte("key2"), []byte("value0"))
+	parentKVStore.Set([]byte("key3"), []byte("value3"))
+	parentKVStore.Set([]byte("key4"), []byte("value4"))
+	parentKVStore.Set([]byte("key5"), []byte("value5"))
+
+	writeset := make(multiversion.WriteSet)
+	writeset["key1"] = []byte("value1")
+	writeset["key2"] = []byte("value2")
+	writeset["key3"] = nil
+	mvs.SetWriteset(1, 2, writeset)
+
+	readset := make(multiversion.ReadSet)
+	readset["key1"] = []byte("value1")
+	readset["key2"] = []byte("value2")
+	readset["key3"] = nil
+	readset["key4"] = []byte("value4")
+	readset["key5"] = []byte("value5")
+	mvs.SetReadset(5, readset)
+
+	// no key6 because the iteration was performed BEFORE the write
+	iter := vis.Iterator([]byte("key1"), []byte("key7"))
+	for ; iter.Valid(); iter.Next() {
+	}
+	iter.Close()
+
+	// write key 6 AFTER iterator went
+	vis.Set([]byte("key6"), []byte("value6"))
+	vis.WriteToMultiVersionStore()
+
+	// should be valid
+	valid, conflicts := mvs.ValidateTransactionState(5)
+	require.True(t, valid)
+	require.Empty(t, conflicts)
+}
+
+func TestMVSIteratorValidationReverse(t *testing.T) {
+	parentKVStore := dbadapter.Store{DB: dbm.NewMemDB()}
+	mvs := multiversion.NewMultiVersionStore(parentKVStore)
+	vis := multiversion.NewVersionIndexedStore(parentKVStore, mvs, 5, 1, make(chan occ.Abort))
+
+	parentKVStore.Set([]byte("key2"), []byte("value0"))
+	parentKVStore.Set([]byte("key3"), []byte("value3"))
+	parentKVStore.Set([]byte("key4"), []byte("value4"))
+	parentKVStore.Set([]byte("key5"), []byte("value5"))
+
+	writeset := make(multiversion.WriteSet)
+	writeset["key1"] = []byte("value1")
+	writeset["key2"] = []byte("value2")
+	writeset["key3"] = nil
+	mvs.SetWriteset(1, 2, writeset)
+
+	readset := make(multiversion.ReadSet)
+	readset["key1"] = []byte("value1")
+	readset["key2"] = []byte("value2")
+	readset["key3"] = nil
+	readset["key4"] = []byte("value4")
+	readset["key5"] = []byte("value5")
+	mvs.SetReadset(5, readset)
+
+	// set a key BEFORE iteration occurred
+	vis.Set([]byte("key6"), []byte("value6"))
+
+	iter := vis.ReverseIterator([]byte("key1"), []byte("key7"))
+	for ; iter.Valid(); iter.Next() {
+	}
+	iter.Close()
+	vis.WriteToMultiVersionStore()
+
+	// should be valid
+	valid, conflicts := mvs.ValidateTransactionState(5)
+	require.True(t, valid)
+	require.Empty(t, conflicts)
+}
+
+func TestMVSIteratorValidationEarlyStop(t *testing.T) {
+	parentKVStore := dbadapter.Store{DB: dbm.NewMemDB()}
+	mvs := multiversion.NewMultiVersionStore(parentKVStore)
+	vis := multiversion.NewVersionIndexedStore(parentKVStore, mvs, 5, 1, make(chan occ.Abort))
+
+	parentKVStore.Set([]byte("key2"), []byte("value0"))
+	parentKVStore.Set([]byte("key3"), []byte("value3"))
+	parentKVStore.Set([]byte("key4"), []byte("value4"))
+	parentKVStore.Set([]byte("key5"), []byte("value5"))
+
+	writeset := make(multiversion.WriteSet)
+	writeset["key1"] = []byte("value1")
+	writeset["key2"] = []byte("value2")
+	writeset["key3"] = nil
+	mvs.SetWriteset(1, 2, writeset)
+
+	readset := make(multiversion.ReadSet)
+	readset["key1"] = []byte("value1")
+	readset["key2"] = []byte("value2")
+	readset["key3"] = nil
+	readset["key4"] = []byte("value4")
+	mvs.SetReadset(5, readset)
+
+	iter := vis.Iterator([]byte("key1"), []byte("key7"))
+	for ; iter.Valid(); iter.Next() {
+		// read the value and see if we want to break
+		if bytes.Equal(iter.Key(), []byte("key4")) {
+			break
+		}
+	}
+	iter.Close()
+	vis.WriteToMultiVersionStore()
+
+	// removal of key5 - but irrelevant because of early stop
+	writeset2 := make(multiversion.WriteSet)
+	writeset2["key5"] = nil
+	mvs.SetWriteset(2, 2, writeset2)
+
+	// should be valid
+	valid, conflicts := mvs.ValidateTransactionState(5)
+	require.True(t, valid)
+	require.Empty(t, conflicts)
+}
+
+// TODO: what about early stop with a new key added in the range? - especially if its the last key that we stopped at?
+func TestMVSIteratorValidationEarlyStopAtEndOfRange(t *testing.T) {
+	parentKVStore := dbadapter.Store{DB: dbm.NewMemDB()}
+	mvs := multiversion.NewMultiVersionStore(parentKVStore)
+	vis := multiversion.NewVersionIndexedStore(parentKVStore, mvs, 5, 1, make(chan occ.Abort))
+
+	parentKVStore.Set([]byte("key2"), []byte("value0"))
+	parentKVStore.Set([]byte("key3"), []byte("value3"))
+	parentKVStore.Set([]byte("key4"), []byte("value4"))
+	parentKVStore.Set([]byte("key5"), []byte("value5"))
+
+	writeset := make(multiversion.WriteSet)
+	writeset["key1"] = []byte("value1")
+	writeset["key2"] = []byte("value2")
+	writeset["key3"] = nil
+	mvs.SetWriteset(1, 2, writeset)
+
+	// test basic iteration
+	iter := vis.Iterator([]byte("key1"), []byte("key7"))
+	for ; iter.Valid(); iter.Next() {
+		// read the value and see if we want to break
+		if bytes.Equal(iter.Key(), []byte("key5")) {
+			break
+		}
+	}
+	iter.Close()
+	vis.WriteToMultiVersionStore()
+
+	// add key6
+	writeset2 := make(multiversion.WriteSet)
+	writeset2["key6"] = []byte("value6")
+	mvs.SetWriteset(2, 2, writeset2)
+
+	// should be valid
+	valid, conflicts := mvs.ValidateTransactionState(5)
+	require.True(t, valid)
+	require.Empty(t, conflicts)
+}
diff --git a/store/multiversion/trackediterator.go b/store/multiversion/trackediterator.go
new file mode 100644
index 000000000..361d848cb
--- /dev/null
+++ b/store/multiversion/trackediterator.go
@@ -0,0 +1,57 @@
+package multiversion
+
+import "github.com/cosmos/cosmos-sdk/store/types"
+
+// tracked iterator is a wrapper around an existing iterator to track the iterator progress and monitor which keys are iterated.
+type trackedIterator struct {
+	types.Iterator
+
+	iterateset iterationTracker
+	IterateSetHandler
+}
+
+// TODO: test
+
+func NewTrackedIterator(iter types.Iterator, iterationTracker iterationTracker, iterateSetHandler IterateSetHandler) *trackedIterator {
+	return &trackedIterator{
+		Iterator:          iter,
+		iterateset:        iterationTracker,
+		IterateSetHandler: iterateSetHandler,
+	}
+}
+
+// Close calls first updates the iterateset from the iterator, and then calls iterator.Close()
+func (ti *trackedIterator) Close() error {
+	// TODO: if there are more keys to the iterator, then we consider it early stopped?
+	if ti.Iterator.Valid() {
+		// TODO: test whether reaching end of iteration range means valid is true or false
+		ti.iterateset.SetEarlyStopKey(ti.Iterator.Key())
+	}
+	// Update iterate set
+	ti.IterateSetHandler.UpdateIterateSet(ti.iterateset)
+	return ti.Iterator.Close()
+}
+
+// Key calls the iterator.Key() and adds the key to the iterateset, then returns the key from the iterator
+func (ti *trackedIterator) Key() []byte {
+	key := ti.Iterator.Key()
+	// add key to the tracker
+	ti.iterateset.AddKey(key)
+	return key
+}
+
+// Value calls the iterator.Key() and adds the key to the iterateset, then returns the value from the iterator
+func (ti *trackedIterator) Value() []byte {
+	key := ti.Iterator.Key()
+	// add key to the tracker
+	ti.iterateset.AddKey(key)
+	return ti.Iterator.Value()
+}
+
+func (ti *trackedIterator) Next() {
+	// add current key to the tracker
+	key := ti.Iterator.Key()
+	ti.iterateset.AddKey(key)
+	// call next
+	ti.Iterator.Next()
+}

From 0aebbc95121e77a94b32f4c62af9f9346d12b81f Mon Sep 17 00:00:00 2001
From: Steven Landers <steven@seinetwork.io>
Date: Thu, 19 Oct 2023 13:41:46 -0400
Subject: [PATCH 14/65] [occ] Add scheduler logic for validation (#336)

## Describe your changes and provide context
- This was copied from #332 which became unwieldy due to commit history
(merges/rebases)
- Adds scheduler logic for validation
- In this initial version it completes all executions then performs
validations (which feed retries)
- Once we start benchmarking we can make performance improvements to
this
- Retries tasks that fail validation and have no dependencies

## Testing performed to validate your change
- Scheduler Test verifies multi-worker with conflicts
---
 baseapp/abci.go                  |   4 -
 baseapp/deliver_tx_batch_test.go | 131 ++++++++++++++++++++
 server/mock/store.go             |   8 ++
 store/cachemulti/store.go        |  17 +++
 store/multiversion/mvkv.go       |  17 +++
 store/multiversion/store.go      |   7 ++
 store/rootmulti/store.go         |  12 ++
 store/types/store.go             |   6 +
 tasks/scheduler.go               | 198 +++++++++++++++++++++++++------
 tasks/scheduler_test.go          |  88 ++++++++++----
 10 files changed, 426 insertions(+), 62 deletions(-)
 create mode 100644 baseapp/deliver_tx_batch_test.go

diff --git a/baseapp/abci.go b/baseapp/abci.go
index 586162ca3..52661b30a 100644
--- a/baseapp/abci.go
+++ b/baseapp/abci.go
@@ -236,11 +236,7 @@ func (app *BaseApp) CheckTx(ctx context.Context, req *abci.RequestCheckTx) (*abc
 }
 
 // DeliverTxBatch executes multiple txs
-// TODO: support occ logic with scheduling
 func (app *BaseApp) DeliverTxBatch(ctx sdk.Context, req sdk.DeliverTxBatchRequest) (res sdk.DeliverTxBatchResponse) {
-	//TODO: inject multiversion store without import cycle (figure out right place for this)
-	// ctx = ctx.WithMultiVersionStore(multiversion.NewMultiVersionStore())
-
 	reqList := make([]abci.RequestDeliverTx, 0, len(req.TxEntries))
 	for _, tx := range req.TxEntries {
 		reqList = append(reqList, tx.Request)
diff --git a/baseapp/deliver_tx_batch_test.go b/baseapp/deliver_tx_batch_test.go
new file mode 100644
index 000000000..13cd9fd60
--- /dev/null
+++ b/baseapp/deliver_tx_batch_test.go
@@ -0,0 +1,131 @@
+package baseapp
+
+import (
+	"context"
+	"fmt"
+	"strconv"
+	"testing"
+
+	"github.com/stretchr/testify/require"
+	abci "github.com/tendermint/tendermint/abci/types"
+	tmproto "github.com/tendermint/tendermint/proto/tendermint/types"
+
+	"github.com/cosmos/cosmos-sdk/codec"
+	sdk "github.com/cosmos/cosmos-sdk/types"
+)
+
+func toInt(b []byte) int {
+	r, _ := strconv.Atoi(string(b))
+	return r
+}
+
+func toByteArr(i int) []byte {
+	return []byte(fmt.Sprintf("%d", i))
+}
+
+func handlerKVStore(capKey sdk.StoreKey) sdk.Handler {
+	return func(ctx sdk.Context, msg sdk.Msg) (*sdk.Result, error) {
+		ctx = ctx.WithEventManager(sdk.NewEventManager())
+		res := &sdk.Result{}
+
+		// Extract the unique ID from the message (assuming you have added this)
+		txIndex := ctx.TxIndex()
+
+		// Use the unique ID to get a specific key for this transaction
+		sharedKey := []byte(fmt.Sprintf("shared"))
+		txKey := []byte(fmt.Sprintf("tx-%d", txIndex))
+
+		// Similar steps as before: Get the store, retrieve a value, increment it, store back, emit an event
+		// Get the store
+		store := ctx.KVStore(capKey)
+
+		// increment per-tx key (no conflict)
+		val := toInt(store.Get(txKey))
+		store.Set(txKey, toByteArr(val+1))
+
+		// increment shared key
+		sharedVal := toInt(store.Get(sharedKey))
+		store.Set(sharedKey, toByteArr(sharedVal+1))
+
+		// Emit an event with the incremented value and the unique ID
+		ctx.EventManager().EmitEvent(
+			sdk.NewEvent(sdk.EventTypeMessage,
+				sdk.NewAttribute("shared-val", fmt.Sprintf("%d", sharedVal+1)),
+				sdk.NewAttribute("tx-val", fmt.Sprintf("%d", val+1)),
+				sdk.NewAttribute("tx-id", fmt.Sprintf("%d", txIndex)),
+			),
+		)
+
+		res.Events = ctx.EventManager().Events().ToABCIEvents()
+		return res, nil
+	}
+}
+
+func requireAttribute(t *testing.T, evts []abci.Event, name string, val string) {
+	for _, evt := range evts {
+		for _, att := range evt.Attributes {
+			if string(att.Key) == name {
+				require.Equal(t, val, string(att.Value))
+				return
+			}
+		}
+	}
+	require.Fail(t, fmt.Sprintf("attribute %s not found via value %s", name, val))
+}
+
+func TestDeliverTxBatch(t *testing.T) {
+	// test increments in the ante
+	//anteKey := []byte("ante-key")
+	anteOpt := func(bapp *BaseApp) {}
+
+	// test increments in the handler
+	routerOpt := func(bapp *BaseApp) {
+		r := sdk.NewRoute(routeMsgCounter, handlerKVStore(capKey1))
+		bapp.Router().AddRoute(r)
+	}
+
+	app := setupBaseApp(t, anteOpt, routerOpt)
+	app.InitChain(context.Background(), &abci.RequestInitChain{})
+
+	// Create same codec used in txDecoder
+	codec := codec.NewLegacyAmino()
+	registerTestCodec(codec)
+
+	nBlocks := 3
+	txPerHeight := 5
+
+	for blockN := 0; blockN < nBlocks; blockN++ {
+		header := tmproto.Header{Height: int64(blockN) + 1}
+		app.setDeliverState(header)
+		app.deliverState.ctx = app.deliverState.ctx.WithBlockGasMeter(sdk.NewInfiniteGasMeter())
+		app.BeginBlock(app.deliverState.ctx, abci.RequestBeginBlock{Header: header})
+
+		var requests []*sdk.DeliverTxEntry
+		for i := 0; i < txPerHeight; i++ {
+			counter := int64(blockN*txPerHeight + i)
+			tx := newTxCounter(counter, counter)
+
+			txBytes, err := codec.Marshal(tx)
+			require.NoError(t, err)
+			requests = append(requests, &sdk.DeliverTxEntry{
+				Request: abci.RequestDeliverTx{Tx: txBytes},
+			})
+		}
+
+		responses := app.DeliverTxBatch(app.deliverState.ctx, sdk.DeliverTxBatchRequest{TxEntries: requests})
+		require.Len(t, responses.Results, txPerHeight)
+
+		for idx, deliverTxRes := range responses.Results {
+			res := deliverTxRes.Response
+			require.Equal(t, abci.CodeTypeOK, res.Code)
+			requireAttribute(t, res.Events, "tx-id", fmt.Sprintf("%d", idx))
+			requireAttribute(t, res.Events, "tx-val", fmt.Sprintf("%d", blockN+1))
+			requireAttribute(t, res.Events, "shared-val", fmt.Sprintf("%d", blockN*txPerHeight+idx+1))
+		}
+
+		app.EndBlock(app.deliverState.ctx, abci.RequestEndBlock{})
+		require.Empty(t, app.deliverState.ctx.MultiStore().GetEvents())
+		app.SetDeliverStateToCommit()
+		app.Commit(context.Background())
+	}
+}
diff --git a/server/mock/store.go b/server/mock/store.go
index a4ebbcb37..bdbc8a4d6 100644
--- a/server/mock/store.go
+++ b/server/mock/store.go
@@ -229,3 +229,11 @@ func (kv kvStore) ReverseSubspaceIterator(prefix []byte) sdk.Iterator {
 func NewCommitMultiStore() sdk.CommitMultiStore {
 	return multiStore{kv: make(map[sdk.StoreKey]kvStore)}
 }
+
+func (ms multiStore) SetKVStores(handler func(key store.StoreKey, s sdk.KVStore) store.CacheWrap) store.MultiStore {
+	panic("not implemented")
+}
+
+func (ms multiStore) StoreKeys() []sdk.StoreKey {
+	panic("not implemented")
+}
diff --git a/store/cachemulti/store.go b/store/cachemulti/store.go
index 43e00c32b..96ce20dfc 100644
--- a/store/cachemulti/store.go
+++ b/store/cachemulti/store.go
@@ -208,3 +208,20 @@ func (cms Store) GetKVStore(key types.StoreKey) types.KVStore {
 func (cms Store) GetWorkingHash() ([]byte, error) {
 	panic("should never attempt to get working hash from cache multi store")
 }
+
+// StoreKeys returns a list of all store keys
+func (cms Store) StoreKeys() []types.StoreKey {
+	keys := make([]types.StoreKey, 0, len(cms.stores))
+	for _, key := range cms.keys {
+		keys = append(keys, key)
+	}
+	return keys
+}
+
+// SetKVStores sets the underlying KVStores via a handler for each key
+func (cms Store) SetKVStores(handler func(sk types.StoreKey, s types.KVStore) types.CacheWrap) types.MultiStore {
+	for k, s := range cms.stores {
+		cms.stores[k] = handler(k, s.(types.KVStore))
+	}
+	return cms
+}
diff --git a/store/multiversion/mvkv.go b/store/multiversion/mvkv.go
index 1b2f947c1..6eeabd517 100644
--- a/store/multiversion/mvkv.go
+++ b/store/multiversion/mvkv.go
@@ -6,6 +6,8 @@ import (
 	"sync"
 	"time"
 
+	abci "github.com/tendermint/tendermint/abci/types"
+
 	"github.com/cosmos/cosmos-sdk/store/types"
 	"github.com/cosmos/cosmos-sdk/telemetry"
 	scheduler "github.com/cosmos/cosmos-sdk/types/occ"
@@ -356,6 +358,21 @@ func (store *VersionIndexedStore) UpdateReadSet(key []byte, value []byte) {
 	store.dirtySet[keyStr] = struct{}{}
 }
 
+// Write implements types.CacheWrap so this store can exist on the cache multi store
+func (store *VersionIndexedStore) Write() {
+	panic("not implemented")
+}
+
+// GetEvents implements types.CacheWrap so this store can exist on the cache multi store
+func (store *VersionIndexedStore) GetEvents() []abci.Event {
+	panic("not implemented")
+}
+
+// ResetEvents implements types.CacheWrap so this store can exist on the cache multi store
+func (store *VersionIndexedStore) ResetEvents() {
+	panic("not implemented")
+}
+
 func (store *VersionIndexedStore) UpdateIterateSet(iterationTracker iterationTracker) {
 	// append to iterateset
 	store.iterateset = append(store.iterateset, iterationTracker)
diff --git a/store/multiversion/store.go b/store/multiversion/store.go
index 0d16f12d6..7c14c6415 100644
--- a/store/multiversion/store.go
+++ b/store/multiversion/store.go
@@ -8,6 +8,7 @@ import (
 
 	"github.com/cosmos/cosmos-sdk/store/types"
 	"github.com/cosmos/cosmos-sdk/telemetry"
+	"github.com/cosmos/cosmos-sdk/types/occ"
 	occtypes "github.com/cosmos/cosmos-sdk/types/occ"
 	db "github.com/tendermint/tm-db"
 )
@@ -24,6 +25,7 @@ type MultiVersionStore interface {
 	CollectIteratorItems(index int) *db.MemDB
 	SetReadset(index int, readset ReadSet)
 	GetReadset(index int) ReadSet
+	VersionedIndexedStore(index int, incarnation int, abortChannel chan occ.Abort) *VersionIndexedStore
 	SetIterateset(index int, iterateset Iterateset)
 	GetIterateset(index int) Iterateset
 	ValidateTransactionState(index int) (bool, []int)
@@ -58,6 +60,11 @@ func NewMultiVersionStore(parentStore types.KVStore) *Store {
 	}
 }
 
+// VersionedIndexedStore creates a new versioned index store for a given incarnation and transaction index
+func (s *Store) VersionedIndexedStore(index int, incarnation int, abortChannel chan occ.Abort) *VersionIndexedStore {
+	return NewVersionIndexedStore(s.parentStore, s, index, incarnation, abortChannel)
+}
+
 // GetLatest implements MultiVersionStore.
 func (s *Store) GetLatest(key []byte) (value MultiVersionValueItem) {
 	s.mtx.RLock()
diff --git a/store/rootmulti/store.go b/store/rootmulti/store.go
index e14280b46..c7578bdca 100644
--- a/store/rootmulti/store.go
+++ b/store/rootmulti/store.go
@@ -1197,3 +1197,15 @@ func flushPruningHeights(batch dbm.Batch, pruneHeights []int64) {
 
 	batch.Set([]byte(pruneHeightsKey), bz)
 }
+
+func (rs *Store) SetKVStores(handler func(key types.StoreKey, s types.KVStore) types.CacheWrap) types.MultiStore {
+	panic("SetKVStores is not implemented for rootmulti")
+}
+
+func (rs *Store) StoreKeys() []types.StoreKey {
+	res := make([]types.StoreKey, len(rs.keysByName))
+	for _, sk := range rs.keysByName {
+		res = append(res, sk)
+	}
+	return res
+}
diff --git a/store/types/store.go b/store/types/store.go
index b34068e9a..5ecb5e166 100644
--- a/store/types/store.go
+++ b/store/types/store.go
@@ -145,6 +145,12 @@ type MultiStore interface {
 
 	// Resets the tracked event list
 	ResetEvents()
+
+	// SetKVStores is a generalized wrapper method
+	SetKVStores(handler func(key StoreKey, s KVStore) CacheWrap) MultiStore
+
+	// StoreKeys returns a list of store keys
+	StoreKeys() []StoreKey
 }
 
 // From MultiStore.CacheMultiStore()....
diff --git a/tasks/scheduler.go b/tasks/scheduler.go
index c8b063fe2..575fc1547 100644
--- a/tasks/scheduler.go
+++ b/tasks/scheduler.go
@@ -1,9 +1,15 @@
 package tasks
 
 import (
-	sdk "github.com/cosmos/cosmos-sdk/types"
+	"sort"
+
 	"github.com/tendermint/tendermint/abci/types"
 	"golang.org/x/sync/errgroup"
+
+	"github.com/cosmos/cosmos-sdk/store/multiversion"
+	store "github.com/cosmos/cosmos-sdk/store/types"
+	sdk "github.com/cosmos/cosmos-sdk/types"
+	"github.com/cosmos/cosmos-sdk/types/occ"
 )
 
 type status string
@@ -21,14 +27,32 @@ const (
 	// statusValidated means the task has been validated
 	// tasks in this status can be reset if an earlier task fails validation
 	statusValidated status = "validated"
+	// statusWaiting tasks are waiting for another tx to complete
+	statusWaiting status = "waiting"
 )
 
 type deliverTxTask struct {
-	Status      status
-	Index       int
-	Incarnation int
-	Request     types.RequestDeliverTx
-	Response    *types.ResponseDeliverTx
+	Ctx     sdk.Context
+	AbortCh chan occ.Abort
+
+	Status        status
+	Dependencies  []int
+	Abort         *occ.Abort
+	Index         int
+	Incarnation   int
+	Request       types.RequestDeliverTx
+	Response      *types.ResponseDeliverTx
+	VersionStores map[sdk.StoreKey]*multiversion.VersionIndexedStore
+}
+
+func (dt *deliverTxTask) Increment() {
+	dt.Incarnation++
+	dt.Status = statusPending
+	dt.Response = nil
+	dt.Abort = nil
+	dt.AbortCh = nil
+	dt.Dependencies = nil
+	dt.VersionStores = nil
 }
 
 // Scheduler processes tasks concurrently
@@ -37,8 +61,9 @@ type Scheduler interface {
 }
 
 type scheduler struct {
-	deliverTx func(ctx sdk.Context, req types.RequestDeliverTx) (res types.ResponseDeliverTx)
-	workers   int
+	deliverTx          func(ctx sdk.Context, req types.RequestDeliverTx) (res types.ResponseDeliverTx)
+	workers            int
+	multiVersionStores map[sdk.StoreKey]multiversion.MultiVersionStore
 }
 
 // NewScheduler creates a new scheduler
@@ -49,6 +74,31 @@ func NewScheduler(workers int, deliverTxFunc func(ctx sdk.Context, req types.Req
 	}
 }
 
+func (s *scheduler) invalidateTask(task *deliverTxTask) {
+	for _, mv := range s.multiVersionStores {
+		mv.InvalidateWriteset(task.Index, task.Incarnation)
+	}
+}
+
+func (s *scheduler) findConflicts(task *deliverTxTask) (bool, []int) {
+	var conflicts []int
+	uniq := make(map[int]struct{})
+	valid := true
+	for _, mv := range s.multiVersionStores {
+		ok, mvConflicts := mv.ValidateTransactionState(task.Index)
+		for _, c := range mvConflicts {
+			if _, ok := uniq[c]; !ok {
+				conflicts = append(conflicts, c)
+				uniq[c] = struct{}{}
+			}
+		}
+		// any non-ok value makes valid false
+		valid = ok && valid
+	}
+	sort.Ints(conflicts)
+	return valid, conflicts
+}
+
 func toTasks(reqs []types.RequestDeliverTx) []*deliverTxTask {
 	res := make([]*deliverTxTask, 0, len(reqs))
 	for idx, r := range reqs {
@@ -69,36 +119,65 @@ func collectResponses(tasks []*deliverTxTask) []types.ResponseDeliverTx {
 	return res
 }
 
+func (s *scheduler) initMultiVersionStore(ctx sdk.Context) {
+	mvs := make(map[sdk.StoreKey]multiversion.MultiVersionStore)
+	keys := ctx.MultiStore().StoreKeys()
+	for _, sk := range keys {
+		mvs[sk] = multiversion.NewMultiVersionStore(ctx.MultiStore().GetKVStore(sk))
+	}
+	s.multiVersionStores = mvs
+}
+
+func indexesValidated(tasks []*deliverTxTask, idx []int) bool {
+	for _, i := range idx {
+		if tasks[i].Status != statusValidated {
+			return false
+		}
+	}
+	return true
+}
+
+func allValidated(tasks []*deliverTxTask) bool {
+	for _, t := range tasks {
+		if t.Status != statusValidated {
+			return false
+		}
+	}
+	return true
+}
+
 func (s *scheduler) ProcessAll(ctx sdk.Context, reqs []types.RequestDeliverTx) ([]types.ResponseDeliverTx, error) {
+	s.initMultiVersionStore(ctx)
 	tasks := toTasks(reqs)
 	toExecute := tasks
-	for len(toExecute) > 0 {
+	for !allValidated(tasks) {
+		var err error
 
 		// execute sets statuses of tasks to either executed or aborted
-		err := s.executeAll(ctx, toExecute)
-		if err != nil {
-			return nil, err
+		if len(toExecute) > 0 {
+			err = s.executeAll(ctx, toExecute)
+			if err != nil {
+				return nil, err
+			}
 		}
 
 		// validate returns any that should be re-executed
 		// note this processes ALL tasks, not just those recently executed
-		toExecute, err = s.validateAll(ctx, tasks)
+		toExecute, err = s.validateAll(tasks)
 		if err != nil {
 			return nil, err
 		}
 		for _, t := range toExecute {
-			t.Incarnation++
-			t.Status = statusPending
-			t.Response = nil
-			//TODO: reset anything that needs resetting
+			t.Increment()
 		}
 	}
+	for _, mv := range s.multiVersionStores {
+		mv.WriteLatestToStore()
+	}
 	return collectResponses(tasks), nil
 }
 
-// TODO: validate each tasks
-// TODO: return list of tasks that are invalid
-func (s *scheduler) validateAll(ctx sdk.Context, tasks []*deliverTxTask) ([]*deliverTxTask, error) {
+func (s *scheduler) validateAll(tasks []*deliverTxTask) ([]*deliverTxTask, error) {
 	var res []*deliverTxTask
 
 	// find first non-validated entry
@@ -111,13 +190,33 @@ func (s *scheduler) validateAll(ctx sdk.Context, tasks []*deliverTxTask) ([]*del
 	}
 
 	for i := startIdx; i < len(tasks); i++ {
-		// any aborted tx is known to be suspect here
-		if tasks[i].Status == statusAborted {
+		switch tasks[i].Status {
+		case statusAborted:
+			// aborted means it can be re-run immediately
 			res = append(res, tasks[i])
-		} else {
-			//TODO: validate the tasks and add it if invalid
-			//TODO: create and handle abort for validation
-			tasks[i].Status = statusValidated
+
+		// validated tasks can become unvalidated if an earlier re-run task now conflicts
+		case statusExecuted, statusValidated:
+			if valid, conflicts := s.findConflicts(tasks[i]); !valid {
+				s.invalidateTask(tasks[i])
+
+				// if the conflicts are now validated, then rerun this task
+				if indexesValidated(tasks, conflicts) {
+					res = append(res, tasks[i])
+				} else {
+					// otherwise, wait for completion
+					tasks[i].Dependencies = conflicts
+					tasks[i].Status = statusWaiting
+				}
+			} else if len(conflicts) == 0 {
+				tasks[i].Status = statusValidated
+			}
+
+		case statusWaiting:
+			// if conflicts are done, then this task is ready to run again
+			if indexesValidated(tasks, tasks[i].Dependencies) {
+				res = append(res, tasks[i])
+			}
 		}
 	}
 	return res, nil
@@ -125,7 +224,6 @@ func (s *scheduler) validateAll(ctx sdk.Context, tasks []*deliverTxTask) ([]*del
 
 // ExecuteAll executes all tasks concurrently
 // Tasks are updated with their status
-// TODO: retries on aborted tasks
 // TODO: error scenarios
 func (s *scheduler) executeAll(ctx sdk.Context, tasks []*deliverTxTask) error {
 	ch := make(chan *deliverTxTask, len(tasks))
@@ -147,19 +245,21 @@ func (s *scheduler) executeAll(ctx sdk.Context, tasks []*deliverTxTask) error {
 					if !ok {
 						return nil
 					}
-					//TODO: ensure version multi store is on context
-					// buffered so it doesn't block on write
-					// abortCh := make(chan occ.Abort, 1)
 
-					//TODO: consume from abort in non-blocking way (give it a length)
-					resp := s.deliverTx(ctx, task.Request)
+					resp := s.deliverTx(task.Ctx, task.Request)
+
+					close(task.AbortCh)
 
-					// close(abortCh)
+					if abt, ok := <-task.AbortCh; ok {
+						task.Status = statusAborted
+						task.Abort = &abt
+						continue
+					}
 
-					//if _, ok := <-abortCh; ok {
-					//	tasks.status = TaskStatusAborted
-					//	continue
-					//}
+					// write from version store to multiversion stores
+					for _, v := range task.VersionStores {
+						v.WriteToMultiVersionStore()
+					}
 
 					task.Status = statusExecuted
 					task.Response = &resp
@@ -170,6 +270,30 @@ func (s *scheduler) executeAll(ctx sdk.Context, tasks []*deliverTxTask) error {
 	grp.Go(func() error {
 		defer close(ch)
 		for _, task := range tasks {
+			// initialize the context
+			ctx = ctx.WithTxIndex(task.Index)
+
+			// non-blocking
+			cms := ctx.MultiStore().CacheMultiStore()
+			abortCh := make(chan occ.Abort, len(s.multiVersionStores))
+
+			// init version stores by store key
+			vs := make(map[store.StoreKey]*multiversion.VersionIndexedStore)
+			for storeKey, mvs := range s.multiVersionStores {
+				vs[storeKey] = mvs.VersionedIndexedStore(task.Index, task.Incarnation, abortCh)
+			}
+
+			// save off version store so we can ask it things later
+			task.VersionStores = vs
+			ms := cms.SetKVStores(func(k store.StoreKey, kvs sdk.KVStore) store.CacheWrap {
+				return vs[k]
+			})
+
+			ctx = ctx.WithMultiStore(ms)
+
+			task.AbortCh = abortCh
+			task.Ctx = ctx
+
 			select {
 			case <-gCtx.Done():
 				return gCtx.Err()
diff --git a/tasks/scheduler_test.go b/tasks/scheduler_test.go
index ba9d97846..f132356ec 100644
--- a/tasks/scheduler_test.go
+++ b/tasks/scheduler_test.go
@@ -2,57 +2,103 @@ package tasks
 
 import (
 	"context"
-	sdk "github.com/cosmos/cosmos-sdk/types"
-	"github.com/stretchr/testify/assert"
-	"github.com/tendermint/tendermint/abci/types"
+	"errors"
+	"fmt"
 	"testing"
+
+	"github.com/stretchr/testify/require"
+	"github.com/tendermint/tendermint/abci/types"
+	dbm "github.com/tendermint/tm-db"
+
+	"github.com/cosmos/cosmos-sdk/store/cachekv"
+	"github.com/cosmos/cosmos-sdk/store/cachemulti"
+	"github.com/cosmos/cosmos-sdk/store/dbadapter"
+	sdk "github.com/cosmos/cosmos-sdk/types"
 )
 
 type mockDeliverTxFunc func(ctx sdk.Context, req types.RequestDeliverTx) types.ResponseDeliverTx
 
-func (f mockDeliverTxFunc) DeliverTx(ctx sdk.Context, req types.RequestDeliverTx) types.ResponseDeliverTx {
-	return f(ctx, req)
-}
+var testStoreKey = sdk.NewKVStoreKey("mock")
+var itemKey = []byte("key")
 
 func requestList(n int) []types.RequestDeliverTx {
 	tasks := make([]types.RequestDeliverTx, n)
 	for i := 0; i < n; i++ {
-		tasks[i] = types.RequestDeliverTx{}
+		tasks[i] = types.RequestDeliverTx{
+			Tx: []byte(fmt.Sprintf("%d", i)),
+		}
 	}
 	return tasks
 }
 
+func initTestCtx() sdk.Context {
+	ctx := sdk.Context{}.WithContext(context.Background())
+	db := dbm.NewMemDB()
+	mem := dbadapter.Store{DB: db}
+	stores := make(map[sdk.StoreKey]sdk.CacheWrapper)
+	stores[testStoreKey] = cachekv.NewStore(mem, testStoreKey, 1000)
+	keys := make(map[string]sdk.StoreKey)
+	keys[testStoreKey.Name()] = testStoreKey
+	store := cachemulti.NewStore(db, stores, keys, nil, nil, nil)
+	ctx = ctx.WithMultiStore(&store)
+	return ctx
+}
+
 func TestProcessAll(t *testing.T) {
 	tests := []struct {
 		name          string
 		workers       int
+		runs          int
 		requests      []types.RequestDeliverTx
 		deliverTxFunc mockDeliverTxFunc
 		expectedErr   error
 	}{
 		{
-			name:     "All tasks processed without aborts",
-			workers:  2,
-			requests: requestList(5),
+			name:     "Test for conflicts",
+			workers:  50,
+			runs:     25,
+			requests: requestList(50),
 			deliverTxFunc: func(ctx sdk.Context, req types.RequestDeliverTx) types.ResponseDeliverTx {
-				return types.ResponseDeliverTx{}
+				// all txs read and write to the same key to maximize conflicts
+				kv := ctx.MultiStore().GetKVStore(testStoreKey)
+				val := string(kv.Get(itemKey))
+
+				// write to the store with this tx's index
+				kv.Set(itemKey, req.Tx)
+
+				// return what was read from the store (final attempt should be index-1)
+				return types.ResponseDeliverTx{
+					Info: val,
+				}
 			},
 			expectedErr: nil,
 		},
-		//TODO: Add more test cases
 	}
 
 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
-			s := NewScheduler(tt.workers, tt.deliverTxFunc.DeliverTx)
-			ctx := sdk.Context{}.WithContext(context.Background())
-
-			res, err := s.ProcessAll(ctx, tt.requests)
-			if err != tt.expectedErr {
-				t.Errorf("Expected error %v, got %v", tt.expectedErr, err)
-			} else {
-				// response for each request exists
-				assert.Len(t, res, len(tt.requests))
+			for i := 0; i < tt.runs; i++ {
+				s := NewScheduler(tt.workers, tt.deliverTxFunc)
+				ctx := initTestCtx()
+
+				res, err := s.ProcessAll(ctx, tt.requests)
+				if !errors.Is(err, tt.expectedErr) {
+					t.Errorf("Expected error %v, got %v", tt.expectedErr, err)
+				} else {
+					require.Len(t, res, len(tt.requests))
+					for idx, response := range res {
+						if idx == 0 {
+							require.Equal(t, "", response.Info)
+						} else {
+							// the info is what was read from the kv store by the tx
+							// each tx writes its own index, so the info should be the index of the previous tx
+							require.Equal(t, fmt.Sprintf("%d", idx-1), response.Info)
+						}
+					}
+					// confirm last write made it to the parent store
+					res := ctx.MultiStore().GetKVStore(testStoreKey).Get(itemKey)
+					require.Equal(t, []byte(fmt.Sprintf("%d", len(tt.requests)-1)), res)
+				}
 			}
 		})
 	}

From 096041b0118be9614c03162459526467fdfbc9c8 Mon Sep 17 00:00:00 2001
From: Steven Landers <steven@seinetwork.io>
Date: Fri, 20 Oct 2023 15:12:45 -0400
Subject: [PATCH 15/65] [occ] Fix situation where no stores causes a panic
 (#338)

## Describe your changes and provide context
Some tests from sei-chain don't inject a store, and while I'm not sure
if that's a valid scenario I made the scheduler.go tolerant to the
situation to avoid introducing this assumption to the system.

## Testing performed to validate your change
New unit test confirming lack of crash
---
 baseapp/deliver_tx_batch_test.go | 40 +++++++++++------
 tasks/scheduler.go               | 31 +++++++------
 tasks/scheduler_test.go          | 77 +++++++++++++++++++++-----------
 3 files changed, 96 insertions(+), 52 deletions(-)

diff --git a/baseapp/deliver_tx_batch_test.go b/baseapp/deliver_tx_batch_test.go
index 13cd9fd60..c8a29b8b7 100644
--- a/baseapp/deliver_tx_batch_test.go
+++ b/baseapp/deliver_tx_batch_test.go
@@ -3,7 +3,6 @@ package baseapp
 import (
 	"context"
 	"fmt"
-	"strconv"
 	"testing"
 
 	"github.com/stretchr/testify/require"
@@ -12,15 +11,27 @@ import (
 
 	"github.com/cosmos/cosmos-sdk/codec"
 	sdk "github.com/cosmos/cosmos-sdk/types"
+	sdkerrors "github.com/cosmos/cosmos-sdk/types/errors"
 )
 
-func toInt(b []byte) int {
-	r, _ := strconv.Atoi(string(b))
-	return r
-}
+func anteHandler(capKey sdk.StoreKey, storeKey []byte) sdk.AnteHandler {
+	return func(ctx sdk.Context, tx sdk.Tx, simulate bool) (sdk.Context, error) {
+		store := ctx.KVStore(capKey)
+		txTest := tx.(txTest)
+
+		if txTest.FailOnAnte {
+			return ctx, sdkerrors.Wrap(sdkerrors.ErrUnauthorized, "ante handler failure")
+		}
+
+		val := getIntFromStore(store, storeKey)
+		setIntOnStore(store, storeKey, val+1)
 
-func toByteArr(i int) []byte {
-	return []byte(fmt.Sprintf("%d", i))
+		ctx.EventManager().EmitEvents(
+			counterEvent("ante-val", val+1),
+		)
+
+		return ctx, nil
+	}
 }
 
 func handlerKVStore(capKey sdk.StoreKey) sdk.Handler {
@@ -40,12 +51,12 @@ func handlerKVStore(capKey sdk.StoreKey) sdk.Handler {
 		store := ctx.KVStore(capKey)
 
 		// increment per-tx key (no conflict)
-		val := toInt(store.Get(txKey))
-		store.Set(txKey, toByteArr(val+1))
+		val := getIntFromStore(store, txKey)
+		setIntOnStore(store, txKey, val+1)
 
 		// increment shared key
-		sharedVal := toInt(store.Get(sharedKey))
-		store.Set(sharedKey, toByteArr(sharedVal+1))
+		sharedVal := getIntFromStore(store, sharedKey)
+		setIntOnStore(store, sharedKey, sharedVal+1)
 
 		// Emit an event with the incremented value and the unique ID
 		ctx.EventManager().EmitEvent(
@@ -75,8 +86,11 @@ func requireAttribute(t *testing.T, evts []abci.Event, name string, val string)
 
 func TestDeliverTxBatch(t *testing.T) {
 	// test increments in the ante
-	//anteKey := []byte("ante-key")
-	anteOpt := func(bapp *BaseApp) {}
+	anteKey := []byte("ante-key")
+
+	anteOpt := func(bapp *BaseApp) {
+		bapp.SetAnteHandler(anteHandler(capKey1, anteKey))
+	}
 
 	// test increments in the handler
 	routerOpt := func(bapp *BaseApp) {
diff --git a/tasks/scheduler.go b/tasks/scheduler.go
index 575fc1547..12179295e 100644
--- a/tasks/scheduler.go
+++ b/tasks/scheduler.go
@@ -272,24 +272,27 @@ func (s *scheduler) executeAll(ctx sdk.Context, tasks []*deliverTxTask) error {
 		for _, task := range tasks {
 			// initialize the context
 			ctx = ctx.WithTxIndex(task.Index)
-
-			// non-blocking
-			cms := ctx.MultiStore().CacheMultiStore()
 			abortCh := make(chan occ.Abort, len(s.multiVersionStores))
 
-			// init version stores by store key
-			vs := make(map[store.StoreKey]*multiversion.VersionIndexedStore)
-			for storeKey, mvs := range s.multiVersionStores {
-				vs[storeKey] = mvs.VersionedIndexedStore(task.Index, task.Incarnation, abortCh)
-			}
+			// if there are no stores, don't try to wrap, because there's nothing to wrap
+			if len(s.multiVersionStores) > 0 {
+				// non-blocking
+				cms := ctx.MultiStore().CacheMultiStore()
 
-			// save off version store so we can ask it things later
-			task.VersionStores = vs
-			ms := cms.SetKVStores(func(k store.StoreKey, kvs sdk.KVStore) store.CacheWrap {
-				return vs[k]
-			})
+				// init version stores by store key
+				vs := make(map[store.StoreKey]*multiversion.VersionIndexedStore)
+				for storeKey, mvs := range s.multiVersionStores {
+					vs[storeKey] = mvs.VersionedIndexedStore(task.Index, task.Incarnation, abortCh)
+				}
 
-			ctx = ctx.WithMultiStore(ms)
+				// save off version store so we can ask it things later
+				task.VersionStores = vs
+				ms := cms.SetKVStores(func(k store.StoreKey, kvs sdk.KVStore) store.CacheWrap {
+					return vs[k]
+				})
+
+				ctx = ctx.WithMultiStore(ms)
+			}
 
 			task.AbortCh = abortCh
 			task.Ctx = ctx
diff --git a/tasks/scheduler_test.go b/tasks/scheduler_test.go
index f132356ec..a2c861f44 100644
--- a/tasks/scheduler_test.go
+++ b/tasks/scheduler_test.go
@@ -4,6 +4,7 @@ import (
 	"context"
 	"errors"
 	"fmt"
+	"github.com/cosmos/cosmos-sdk/store/cachemulti"
 	"testing"
 
 	"github.com/stretchr/testify/require"
@@ -11,7 +12,6 @@ import (
 	dbm "github.com/tendermint/tm-db"
 
 	"github.com/cosmos/cosmos-sdk/store/cachekv"
-	"github.com/cosmos/cosmos-sdk/store/cachemulti"
 	"github.com/cosmos/cosmos-sdk/store/dbadapter"
 	sdk "github.com/cosmos/cosmos-sdk/types"
 )
@@ -31,14 +31,16 @@ func requestList(n int) []types.RequestDeliverTx {
 	return tasks
 }
 
-func initTestCtx() sdk.Context {
+func initTestCtx(injectStores bool) sdk.Context {
 	ctx := sdk.Context{}.WithContext(context.Background())
-	db := dbm.NewMemDB()
-	mem := dbadapter.Store{DB: db}
-	stores := make(map[sdk.StoreKey]sdk.CacheWrapper)
-	stores[testStoreKey] = cachekv.NewStore(mem, testStoreKey, 1000)
 	keys := make(map[string]sdk.StoreKey)
-	keys[testStoreKey.Name()] = testStoreKey
+	stores := make(map[sdk.StoreKey]sdk.CacheWrapper)
+	db := dbm.NewMemDB()
+	if injectStores {
+		mem := dbadapter.Store{DB: db}
+		stores[testStoreKey] = cachekv.NewStore(mem, testStoreKey, 1000)
+		keys[testStoreKey.Name()] = testStoreKey
+	}
 	store := cachemulti.NewStore(db, stores, keys, nil, nil, nil)
 	ctx = ctx.WithMultiStore(&store)
 	return ctx
@@ -51,13 +53,16 @@ func TestProcessAll(t *testing.T) {
 		runs          int
 		requests      []types.RequestDeliverTx
 		deliverTxFunc mockDeliverTxFunc
+		addStores     bool
 		expectedErr   error
+		assertions    func(t *testing.T, ctx sdk.Context, res []types.ResponseDeliverTx)
 	}{
 		{
-			name:     "Test for conflicts",
-			workers:  50,
-			runs:     25,
-			requests: requestList(50),
+			name:      "Test every tx accesses same key",
+			workers:   50,
+			runs:      25,
+			addStores: true,
+			requests:  requestList(50),
 			deliverTxFunc: func(ctx sdk.Context, req types.RequestDeliverTx) types.ResponseDeliverTx {
 				// all txs read and write to the same key to maximize conflicts
 				kv := ctx.MultiStore().GetKVStore(testStoreKey)
@@ -71,6 +76,38 @@ func TestProcessAll(t *testing.T) {
 					Info: val,
 				}
 			},
+			assertions: func(t *testing.T, ctx sdk.Context, res []types.ResponseDeliverTx) {
+				for idx, response := range res {
+					if idx == 0 {
+						require.Equal(t, "", response.Info)
+					} else {
+						// the info is what was read from the kv store by the tx
+						// each tx writes its own index, so the info should be the index of the previous tx
+						require.Equal(t, fmt.Sprintf("%d", idx-1), response.Info)
+					}
+				}
+				// confirm last write made it to the parent store
+				latest := ctx.MultiStore().GetKVStore(testStoreKey).Get(itemKey)
+				require.Equal(t, []byte("49"), latest)
+			},
+			expectedErr: nil,
+		},
+		{
+			name:      "Test no stores on context should not panic",
+			workers:   50,
+			runs:      1,
+			addStores: false,
+			requests:  requestList(50),
+			deliverTxFunc: func(ctx sdk.Context, req types.RequestDeliverTx) types.ResponseDeliverTx {
+				return types.ResponseDeliverTx{
+					Info: fmt.Sprintf("%d", ctx.TxIndex()),
+				}
+			},
+			assertions: func(t *testing.T, ctx sdk.Context, res []types.ResponseDeliverTx) {
+				for idx, response := range res {
+					require.Equal(t, fmt.Sprintf("%d", idx), response.Info)
+				}
+			},
 			expectedErr: nil,
 		},
 	}
@@ -79,25 +116,15 @@ func TestProcessAll(t *testing.T) {
 		t.Run(tt.name, func(t *testing.T) {
 			for i := 0; i < tt.runs; i++ {
 				s := NewScheduler(tt.workers, tt.deliverTxFunc)
-				ctx := initTestCtx()
+				ctx := initTestCtx(tt.addStores)
 
 				res, err := s.ProcessAll(ctx, tt.requests)
+				require.Len(t, res, len(tt.requests))
+
 				if !errors.Is(err, tt.expectedErr) {
 					t.Errorf("Expected error %v, got %v", tt.expectedErr, err)
 				} else {
-					require.Len(t, res, len(tt.requests))
-					for idx, response := range res {
-						if idx == 0 {
-							require.Equal(t, "", response.Info)
-						} else {
-							// the info is what was read from the kv store by the tx
-							// each tx writes its own index, so the info should be the index of the previous tx
-							require.Equal(t, fmt.Sprintf("%d", idx-1), response.Info)
-						}
-					}
-					// confirm last write made it to the parent store
-					res := ctx.MultiStore().GetKVStore(testStoreKey).Get(itemKey)
-					require.Equal(t, []byte(fmt.Sprintf("%d", len(tt.requests)-1)), res)
+					tt.assertions(t, ctx, res)
 				}
 			}
 		})

From 0b9193ce54393443d11e64044c3c9abe6c13f282 Mon Sep 17 00:00:00 2001
From: Steven Landers <steven@seinetwork.io>
Date: Mon, 23 Oct 2023 11:40:06 -0400
Subject: [PATCH 16/65] Add occ flag check to context (#340)

## Describe your changes and provide context
- Allows sei-chain to ask isOCCEnabled() so that it can choose to use
the OCC logic
- Sei-chain can set this to true according to desired logic

## Testing performed to validate your change
- unit test that sets flag and verifies value
---
 types/context.go      | 11 +++++++++++
 types/context_test.go |  6 +++++-
 2 files changed, 16 insertions(+), 1 deletion(-)

diff --git a/types/context.go b/types/context.go
index ef847d3a3..e36e88dc8 100644
--- a/types/context.go
+++ b/types/context.go
@@ -34,6 +34,7 @@ type Context struct {
 	voteInfo      []abci.VoteInfo
 	gasMeter      GasMeter
 	blockGasMeter GasMeter
+	occEnabled    bool
 	checkTx       bool
 	recheckTx     bool // if recheckTx == true, then checkTx must also be true
 	minGasPrice   DecCoins
@@ -104,6 +105,10 @@ func (c Context) IsReCheckTx() bool {
 	return c.recheckTx
 }
 
+func (c Context) IsOCCEnabled() bool {
+	return c.occEnabled
+}
+
 func (c Context) MinGasPrices() DecCoins {
 	return c.minGasPrice
 }
@@ -281,6 +286,12 @@ func (c Context) WithIsCheckTx(isCheckTx bool) Context {
 	return c
 }
 
+// WithIsOCCEnabled enables or disables whether OCC is used as the concurrency algorithm
+func (c Context) WithIsOCCEnabled(isOCCEnabled bool) Context {
+	c.occEnabled = isOCCEnabled
+	return c
+}
+
 // WithIsRecheckTx called with true will also set true on checkTx in order to
 // enforce the invariant that if recheckTx = true then checkTx = true as well.
 func (c Context) WithIsReCheckTx(isRecheckTx bool) Context {
diff --git a/types/context_test.go b/types/context_test.go
index 92f5dccaf..e49a82903 100644
--- a/types/context_test.go
+++ b/types/context_test.go
@@ -87,6 +87,7 @@ func (s *contextTestSuite) TestContextWithCustom() {
 	height := int64(1)
 	chainid := "chainid"
 	ischeck := true
+	isOCC := true
 	txbytes := []byte("txbytes")
 	logger := mocks.NewMockLogger(ctrl)
 	voteinfos := []abci.VoteInfo{{}}
@@ -106,10 +107,13 @@ func (s *contextTestSuite) TestContextWithCustom() {
 		WithGasMeter(meter).
 		WithMinGasPrices(minGasPrices).
 		WithBlockGasMeter(blockGasMeter).
-		WithHeaderHash(headerHash)
+		WithHeaderHash(headerHash).
+		WithIsOCCEnabled(isOCC)
+
 	s.Require().Equal(height, ctx.BlockHeight())
 	s.Require().Equal(chainid, ctx.ChainID())
 	s.Require().Equal(ischeck, ctx.IsCheckTx())
+	s.Require().Equal(isOCC, ctx.IsOCCEnabled())
 	s.Require().Equal(txbytes, ctx.TxBytes())
 	s.Require().Equal(logger, ctx.Logger())
 	s.Require().Equal(voteinfos, ctx.VoteInfos())

From 27484e42caff62c1049fe89e05d1a213f1d8011d Mon Sep 17 00:00:00 2001
From: Uday Patil <udpatil@gmail.com>
Date: Tue, 24 Oct 2023 10:52:14 -0500
Subject: [PATCH 17/65] [occ] Add struct field and helpers for estimate
 prefills (#341)

## Describe your changes and provide context
This adds in the ability to prefill estimates based on metadata passed
along with deliverTxBatch

## Testing performed to validate your change
Unit Test to verify that multiversion store initialization is now
idempotent, and works properly regardless of whether estimate prefill is
enabled
---
 baseapp/abci.go         | 15 +++++++--------
 tasks/scheduler.go      | 30 ++++++++++++++++++++++++------
 tasks/scheduler_test.go | 16 ++++++++++------
 types/tx_batch.go       | 11 +++++++++--
 4 files changed, 50 insertions(+), 22 deletions(-)

diff --git a/baseapp/abci.go b/baseapp/abci.go
index 52661b30a..fed748722 100644
--- a/baseapp/abci.go
+++ b/baseapp/abci.go
@@ -6,13 +6,14 @@ import (
 	"encoding/json"
 	"errors"
 	"fmt"
-	"github.com/cosmos/cosmos-sdk/tasks"
 	"os"
 	"sort"
 	"strings"
 	"syscall"
 	"time"
 
+	"github.com/cosmos/cosmos-sdk/tasks"
+
 	"github.com/armon/go-metrics"
 	"github.com/gogo/protobuf/proto"
 	abci "github.com/tendermint/tendermint/abci/types"
@@ -237,15 +238,13 @@ func (app *BaseApp) CheckTx(ctx context.Context, req *abci.RequestCheckTx) (*abc
 
 // DeliverTxBatch executes multiple txs
 func (app *BaseApp) DeliverTxBatch(ctx sdk.Context, req sdk.DeliverTxBatchRequest) (res sdk.DeliverTxBatchResponse) {
-	reqList := make([]abci.RequestDeliverTx, 0, len(req.TxEntries))
-	for _, tx := range req.TxEntries {
-		reqList = append(reqList, tx.Request)
-	}
-
 	scheduler := tasks.NewScheduler(app.concurrencyWorkers, app.DeliverTx)
-	txRes, err := scheduler.ProcessAll(ctx, reqList)
+	// This will basically no-op the actual prefill if the metadata for the txs is empty
+
+	// process all txs, this will also initializes the MVS if prefill estimates was disabled
+	txRes, err := scheduler.ProcessAll(ctx, req.TxEntries)
 	if err != nil {
-		//TODO: handle error
+		// TODO: handle error
 	}
 
 	responses := make([]*sdk.DeliverTxResult, 0, len(req.TxEntries))
diff --git a/tasks/scheduler.go b/tasks/scheduler.go
index 12179295e..095deb545 100644
--- a/tasks/scheduler.go
+++ b/tasks/scheduler.go
@@ -57,7 +57,7 @@ func (dt *deliverTxTask) Increment() {
 
 // Scheduler processes tasks concurrently
 type Scheduler interface {
-	ProcessAll(ctx sdk.Context, reqs []types.RequestDeliverTx) ([]types.ResponseDeliverTx, error)
+	ProcessAll(ctx sdk.Context, reqs []*sdk.DeliverTxEntry) ([]types.ResponseDeliverTx, error)
 }
 
 type scheduler struct {
@@ -99,11 +99,11 @@ func (s *scheduler) findConflicts(task *deliverTxTask) (bool, []int) {
 	return valid, conflicts
 }
 
-func toTasks(reqs []types.RequestDeliverTx) []*deliverTxTask {
+func toTasks(reqs []*sdk.DeliverTxEntry) []*deliverTxTask {
 	res := make([]*deliverTxTask, 0, len(reqs))
 	for idx, r := range reqs {
 		res = append(res, &deliverTxTask{
-			Request: r,
+			Request: r.Request,
 			Index:   idx,
 			Status:  statusPending,
 		})
@@ -119,7 +119,10 @@ func collectResponses(tasks []*deliverTxTask) []types.ResponseDeliverTx {
 	return res
 }
 
-func (s *scheduler) initMultiVersionStore(ctx sdk.Context) {
+func (s *scheduler) tryInitMultiVersionStore(ctx sdk.Context) {
+	if s.multiVersionStores != nil {
+		return
+	}
 	mvs := make(map[sdk.StoreKey]multiversion.MultiVersionStore)
 	keys := ctx.MultiStore().StoreKeys()
 	for _, sk := range keys {
@@ -146,8 +149,23 @@ func allValidated(tasks []*deliverTxTask) bool {
 	return true
 }
 
-func (s *scheduler) ProcessAll(ctx sdk.Context, reqs []types.RequestDeliverTx) ([]types.ResponseDeliverTx, error) {
-	s.initMultiVersionStore(ctx)
+func (s *scheduler) PrefillEstimates(ctx sdk.Context, reqs []*sdk.DeliverTxEntry) {
+	// iterate over TXs, update estimated writesets where applicable
+	for i, req := range reqs {
+		mappedWritesets := req.EstimatedWritesets
+		// order shouldnt matter for storeKeys because each storeKey partitioned MVS is independent
+		for storeKey, writeset := range mappedWritesets {
+			// we use `-1` to indicate a prefill incarnation
+			s.multiVersionStores[storeKey].SetEstimatedWriteset(i, -1, writeset)
+		}
+	}
+}
+
+func (s *scheduler) ProcessAll(ctx sdk.Context, reqs []*sdk.DeliverTxEntry) ([]types.ResponseDeliverTx, error) {
+	// initialize mutli-version stores if they haven't been initialized yet
+	s.tryInitMultiVersionStore(ctx)
+	// prefill estimates
+	s.PrefillEstimates(ctx, reqs)
 	tasks := toTasks(reqs)
 	toExecute := tasks
 	for !allValidated(tasks) {
diff --git a/tasks/scheduler_test.go b/tasks/scheduler_test.go
index a2c861f44..5cf2be6ba 100644
--- a/tasks/scheduler_test.go
+++ b/tasks/scheduler_test.go
@@ -4,9 +4,10 @@ import (
 	"context"
 	"errors"
 	"fmt"
-	"github.com/cosmos/cosmos-sdk/store/cachemulti"
 	"testing"
 
+	"github.com/cosmos/cosmos-sdk/store/cachemulti"
+
 	"github.com/stretchr/testify/require"
 	"github.com/tendermint/tendermint/abci/types"
 	dbm "github.com/tendermint/tm-db"
@@ -21,12 +22,15 @@ type mockDeliverTxFunc func(ctx sdk.Context, req types.RequestDeliverTx) types.R
 var testStoreKey = sdk.NewKVStoreKey("mock")
 var itemKey = []byte("key")
 
-func requestList(n int) []types.RequestDeliverTx {
-	tasks := make([]types.RequestDeliverTx, n)
+func requestList(n int) []*sdk.DeliverTxEntry {
+	tasks := make([]*sdk.DeliverTxEntry, n)
 	for i := 0; i < n; i++ {
-		tasks[i] = types.RequestDeliverTx{
-			Tx: []byte(fmt.Sprintf("%d", i)),
+		tasks[i] = &sdk.DeliverTxEntry{
+			Request: types.RequestDeliverTx{
+				Tx: []byte(fmt.Sprintf("%d", i)),
+			},
 		}
+
 	}
 	return tasks
 }
@@ -51,7 +55,7 @@ func TestProcessAll(t *testing.T) {
 		name          string
 		workers       int
 		runs          int
-		requests      []types.RequestDeliverTx
+		requests      []*sdk.DeliverTxEntry
 		deliverTxFunc mockDeliverTxFunc
 		addStores     bool
 		expectedErr   error
diff --git a/types/tx_batch.go b/types/tx_batch.go
index a54742fae..b053aa5fa 100644
--- a/types/tx_batch.go
+++ b/types/tx_batch.go
@@ -1,13 +1,20 @@
 package types
 
-import abci "github.com/tendermint/tendermint/abci/types"
+import (
+	"github.com/cosmos/cosmos-sdk/store/multiversion"
+	abci "github.com/tendermint/tendermint/abci/types"
+)
 
 // DeliverTxEntry represents an individual transaction's request within a batch.
 // This can be extended to include tx-level tracing or metadata
 type DeliverTxEntry struct {
-	Request abci.RequestDeliverTx
+	Request            abci.RequestDeliverTx
+	EstimatedWritesets MappedWritesets
 }
 
+// EstimatedWritesets represents an estimated writeset for a transaction mapped by storekey to the writeset estimate.
+type MappedWritesets map[StoreKey]multiversion.WriteSet
+
 // DeliverTxBatchRequest represents a request object for a batch of transactions.
 // This can be extended to include request-level tracing or metadata
 type DeliverTxBatchRequest struct {

From 95ddc84c080dd21ab9510951fcc4ce824a63c213 Mon Sep 17 00:00:00 2001
From: Steven Landers <steven@seinetwork.io>
Date: Mon, 30 Oct 2023 15:37:14 -0400
Subject: [PATCH 18/65] Fix map access panic (#343)

## Describe your changes and provide context
- `CollectIteratorItems` needs to hold an RLock to avoid a concurrent
access panic

## Testing performed to validate your change
- Reproduced through a sei-chain-side test (concurrent instantiates)
---
 store/multiversion/store.go | 45 +++++++++++++++++++++++++++----------
 1 file changed, 33 insertions(+), 12 deletions(-)

diff --git a/store/multiversion/store.go b/store/multiversion/store.go
index 7c14c6415..2ee1c31b9 100644
--- a/store/multiversion/store.go
+++ b/store/multiversion/store.go
@@ -206,7 +206,7 @@ func (s *Store) SetEstimatedWriteset(index int, incarnation int, writeset WriteS
 	s.txWritesetKeys[index] = writeSetKeys
 }
 
-// GetWritesetKeys implements MultiVersionStore.
+// GetAllWritesetKeys implements MultiVersionStore.
 func (s *Store) GetAllWritesetKeys() map[int][]string {
 	s.mtx.RLock()
 	defer s.mtx.RUnlock()
@@ -243,10 +243,13 @@ func (s *Store) GetIterateset(index int) Iterateset {
 
 // CollectIteratorItems implements MultiVersionStore. It will return a memDB containing all of the keys present in the multiversion store within the iteration range prior to (exclusive of) the index.
 func (s *Store) CollectIteratorItems(index int) *db.MemDB {
+	s.mtx.RLock()
+	defer s.mtx.RUnlock()
+
 	sortedItems := db.NewMemDB()
 
 	// get all writeset keys prior to index
-	keys := s.GetAllWritesetKeys()
+	keys := s.txWritesetKeys
 	for i := 0; i < index; i++ {
 		indexedWriteset, ok := keys[i]
 		if !ok {
@@ -316,21 +319,27 @@ func (s *Store) validateIterator(index int, tracker iterationTracker) bool {
 	}
 }
 
-// TODO: do we want to return bool + []int where bool indicates whether it was valid and then []int indicates only ones for which we need to wait due to estimates? - yes i think so?
-func (s *Store) ValidateTransactionState(index int) (bool, []int) {
-	defer telemetry.MeasureSince(time.Now(), "store", "mvs", "validate")
-	conflictSet := map[int]struct{}{}
-	valid := true
+func (s *Store) checkIteratorAtIndex(index int) bool {
+	s.mtx.RLock()
+	defer s.mtx.RUnlock()
 
-	// TODO: can we parallelize for all iterators?
-	iterateset := s.GetIterateset(index)
+	valid := true
+	iterateset := s.txIterateSets[index]
 	for _, iterationTracker := range iterateset {
 		iteratorValid := s.validateIterator(index, iterationTracker)
 		valid = valid && iteratorValid
 	}
+	return valid
+}
+
+func (s *Store) checkReadsetAtIndex(index int) (bool, []int) {
+	s.mtx.RLock()
+	defer s.mtx.RUnlock()
+
+	conflictSet := make(map[int]struct{})
+	readset := s.txReadSets[index]
+	valid := true
 
-	// validate readset
-	readset := s.GetReadset(index)
 	// iterate over readset and check if the value is the same as the latest value relateive to txIndex in the multiversion store
 	for key, value := range readset {
 		// get the latest value from the multiversion store
@@ -357,16 +366,28 @@ func (s *Store) ValidateTransactionState(index int) (bool, []int) {
 		}
 	}
 
-	// convert conflictset into sorted indices
 	conflictIndices := make([]int, 0, len(conflictSet))
 	for index := range conflictSet {
 		conflictIndices = append(conflictIndices, index)
 	}
 
 	sort.Ints(conflictIndices)
+
 	return valid, conflictIndices
 }
 
+// TODO: do we want to return bool + []int where bool indicates whether it was valid and then []int indicates only ones for which we need to wait due to estimates? - yes i think so?
+func (s *Store) ValidateTransactionState(index int) (bool, []int) {
+	defer telemetry.MeasureSince(time.Now(), "store", "mvs", "validate")
+
+	// TODO: can we parallelize for all iterators?
+	iteratorValid := s.checkIteratorAtIndex(index)
+
+	readsetValid, conflictIndices := s.checkReadsetAtIndex(index)
+
+	return iteratorValid && readsetValid, conflictIndices
+}
+
 func (s *Store) WriteLatestToStore() {
 	s.mtx.Lock()
 	defer s.mtx.Unlock()

From be4a4aea71c4ed4f6087747aa5b0bb02552394ab Mon Sep 17 00:00:00 2001
From: Uday Patil <udpatil@gmail.com>
Date: Fri, 3 Nov 2023 09:37:19 -0500
Subject: [PATCH 19/65] Gen estimates writeset (#344)

## Describe your changes and provide context
This adds the accesscontrol module behavior to add the tx writeset
generation

## Testing performed to validate your change
Unit tests + integration with sei-chain and loadtest cluster testing
---
 tasks/scheduler.go                    |  2 +-
 types/accesscontrol/validation.go     |  1 +
 x/accesscontrol/keeper/keeper.go      | 63 +++++++++++++++++++++++++++
 x/accesscontrol/keeper/keeper_test.go | 37 ++++++++++++++++
 x/accesscontrol/keeper/options.go     |  8 ++++
 5 files changed, 110 insertions(+), 1 deletion(-)

diff --git a/tasks/scheduler.go b/tasks/scheduler.go
index 095deb545..a38ef7f96 100644
--- a/tasks/scheduler.go
+++ b/tasks/scheduler.go
@@ -228,7 +228,7 @@ func (s *scheduler) validateAll(tasks []*deliverTxTask) ([]*deliverTxTask, error
 				}
 			} else if len(conflicts) == 0 {
 				tasks[i].Status = statusValidated
-			}
+			} // TODO: do we need to have handling for conflicts existing here?
 
 		case statusWaiting:
 			// if conflicts are done, then this task is ready to run again
diff --git a/types/accesscontrol/validation.go b/types/accesscontrol/validation.go
index 40a525a92..ec83885c6 100644
--- a/types/accesscontrol/validation.go
+++ b/types/accesscontrol/validation.go
@@ -10,6 +10,7 @@ var (
 )
 
 type StoreKeyToResourceTypePrefixMap map[string]map[ResourceType][]byte
+type ResourceTypeToStoreKeyMap map[ResourceType]string
 
 func DefaultStoreKeyToResourceTypePrefixMap() StoreKeyToResourceTypePrefixMap {
 	return StoreKeyToResourceTypePrefixMap{
diff --git a/x/accesscontrol/keeper/keeper.go b/x/accesscontrol/keeper/keeper.go
index 1ca93f602..44189a6d6 100644
--- a/x/accesscontrol/keeper/keeper.go
+++ b/x/accesscontrol/keeper/keeper.go
@@ -12,6 +12,7 @@ import (
 	"github.com/yourbasic/graph"
 
 	"github.com/cosmos/cosmos-sdk/codec"
+	"github.com/cosmos/cosmos-sdk/store/multiversion"
 	"github.com/cosmos/cosmos-sdk/telemetry"
 	sdk "github.com/cosmos/cosmos-sdk/types"
 	acltypes "github.com/cosmos/cosmos-sdk/types/accesscontrol"
@@ -40,6 +41,7 @@ type (
 		MessageDependencyGeneratorMapper DependencyGeneratorMap
 		AccountKeeper                    authkeeper.AccountKeeper
 		StakingKeeper                    stakingkeeper.Keeper
+		ResourceTypeStoreKeyMapping      acltypes.ResourceTypeToStoreKeyMap
 	}
 )
 
@@ -493,6 +495,67 @@ func (k Keeper) IterateWasmDependencies(ctx sdk.Context, handler func(wasmDepend
 	}
 }
 
+type storeKeyMap map[string]sdk.StoreKey
+
+func (k Keeper) GetStoreKeyMap(ctx sdk.Context) storeKeyMap {
+	storeKeyMap := make(storeKeyMap)
+	for _, storeKey := range ctx.MultiStore().StoreKeys() {
+		storeKeyMap[storeKey.Name()] = storeKey
+	}
+	return storeKeyMap
+}
+
+func (k Keeper) UpdateWritesetsWithAccessOps(accessOps []acltypes.AccessOperation, mappedWritesets sdk.MappedWritesets, storeKeyMap storeKeyMap) sdk.MappedWritesets {
+	for _, accessOp := range accessOps {
+		// we only want writes and unknowns (assumed writes)
+		if accessOp.AccessType != acltypes.AccessType_WRITE && accessOp.AccessType != acltypes.AccessType_UNKNOWN {
+			continue
+		}
+		// the accessOps should only have SPECIFIC identifiers (we don't want wildcards)
+		if accessOp.IdentifierTemplate == "*" {
+			continue
+		}
+		// check the resource type to store key map for potential store key
+		if storeKeyStr, ok := k.ResourceTypeStoreKeyMapping[accessOp.ResourceType]; ok {
+			// check that we have a storekey corresponding to that string
+			if storeKey, ok2 := storeKeyMap[storeKeyStr]; ok2 {
+				// if we have a StoreKey, add it to the writeset - writing empty bytes is ok because it will be saved as EstimatedWriteset
+				if _, ok := mappedWritesets[storeKey]; !ok {
+					mappedWritesets[storeKey] = make(multiversion.WriteSet)
+				}
+				mappedWritesets[storeKey][accessOp.IdentifierTemplate] = []byte{}
+			}
+		}
+
+	}
+	return mappedWritesets
+}
+
+// GenerateEstimatedWritesets utilizes the existing patterns for access operation generation to estimate the writesets for a transaction
+func (k Keeper) GenerateEstimatedWritesets(ctx sdk.Context, txDecoder sdk.TxDecoder, anteDepGen sdk.AnteDepGenerator, txIndex int, txBytes []byte) (sdk.MappedWritesets, error) {
+	storeKeyMap := k.GetStoreKeyMap(ctx)
+	writesets := make(sdk.MappedWritesets)
+	tx, err := txDecoder(txBytes)
+	if err != nil {
+		return nil, err
+	}
+	// generate antedeps accessOps for tx
+	anteDeps, err := anteDepGen([]acltypes.AccessOperation{}, tx, txIndex)
+	if err != nil {
+		return nil, err
+	}
+	writesets = k.UpdateWritesetsWithAccessOps(anteDeps, writesets, storeKeyMap)
+
+	// generate accessOps for each message
+	msgs := tx.GetMsgs()
+	for _, msg := range msgs {
+		msgDependencies := k.GetMessageDependencies(ctx, msg)
+		// update estimated writeset for each message deps
+		writesets = k.UpdateWritesetsWithAccessOps(msgDependencies, writesets, storeKeyMap)
+	}
+	return writesets, nil
+}
+
 func (k Keeper) BuildDependencyDag(ctx sdk.Context, txDecoder sdk.TxDecoder, anteDepGen sdk.AnteDepGenerator, txs [][]byte) (*types.Dag, error) {
 	defer MeasureBuildDagDuration(time.Now(), "BuildDependencyDag")
 	// contains the latest msg index for a specific Access Operation
diff --git a/x/accesscontrol/keeper/keeper_test.go b/x/accesscontrol/keeper/keeper_test.go
index f08cd1ade..6e696b8bb 100644
--- a/x/accesscontrol/keeper/keeper_test.go
+++ b/x/accesscontrol/keeper/keeper_test.go
@@ -20,6 +20,7 @@ import (
 	aclkeeper "github.com/cosmos/cosmos-sdk/x/accesscontrol/keeper"
 	acltestutil "github.com/cosmos/cosmos-sdk/x/accesscontrol/testutil"
 	"github.com/cosmos/cosmos-sdk/x/accesscontrol/types"
+	authtypes "github.com/cosmos/cosmos-sdk/x/auth/types"
 	banktypes "github.com/cosmos/cosmos-sdk/x/bank/types"
 	govtypes "github.com/cosmos/cosmos-sdk/x/gov/types"
 	stakingtypes "github.com/cosmos/cosmos-sdk/x/staking/types"
@@ -2669,6 +2670,42 @@ func (suite *KeeperTestSuite) TestBuildSelectorOps_AccessOperationSelectorType_C
 	req.NoError(err)
 }
 
+func TestGenerateEstimatedDependencies(t *testing.T) {
+	app := simapp.Setup(false)
+	ctx := app.BaseApp.NewContext(false, tmproto.Header{})
+
+	accounts := simapp.AddTestAddrsIncremental(app, ctx, 2, sdk.NewInt(30000000))
+	// setup test txs
+	msgs := []sdk.Msg{
+		banktypes.NewMsgSend(accounts[0], accounts[1], sdk.NewCoins(sdk.NewCoin("usei", sdk.NewInt(1)))),
+	}
+	// set up testing mapping
+	app.AccessControlKeeper.ResourceTypeStoreKeyMapping = map[acltypes.ResourceType]string{
+		acltypes.ResourceType_KV_BANK_BALANCES:      banktypes.StoreKey,
+		acltypes.ResourceType_KV_AUTH_ADDRESS_STORE: authtypes.StoreKey,
+	}
+
+	storeKeyMap := app.AccessControlKeeper.GetStoreKeyMap(ctx)
+
+	txBuilder := simapp.MakeTestEncodingConfig().TxConfig.NewTxBuilder()
+	err := txBuilder.SetMsgs(msgs...)
+	require.NoError(t, err)
+	bz, err := simapp.MakeTestEncodingConfig().TxConfig.TxEncoder()(txBuilder.GetTx())
+	require.NoError(t, err)
+
+	writesets, err := app.AccessControlKeeper.GenerateEstimatedWritesets(ctx, simapp.MakeTestEncodingConfig().TxConfig.TxDecoder(), app.GetAnteDepGenerator(), 0, bz)
+	require.NoError(t, err)
+
+	// check writesets
+	require.Equal(t, 2, len(writesets))
+	bankWritesets := writesets[storeKeyMap[banktypes.StoreKey]]
+	require.Equal(t, 3, len(bankWritesets))
+
+	authWritesets := writesets[storeKeyMap[authtypes.StoreKey]]
+	require.Equal(t, 1, len(authWritesets))
+
+}
+
 func TestKeeperTestSuite(t *testing.T) {
 	t.Parallel()
 	suite.Run(t, new(KeeperTestSuite))
diff --git a/x/accesscontrol/keeper/options.go b/x/accesscontrol/keeper/options.go
index 365280ab3..6dd7f3b36 100644
--- a/x/accesscontrol/keeper/options.go
+++ b/x/accesscontrol/keeper/options.go
@@ -1,5 +1,7 @@
 package keeper
 
+import acltypes "github.com/cosmos/cosmos-sdk/types/accesscontrol"
+
 type optsFn func(*Keeper)
 
 func (f optsFn) Apply(keeper *Keeper) {
@@ -25,3 +27,9 @@ func (oldGenerator DependencyGeneratorMap) Merge(newGenerator DependencyGenerato
 	}
 	return oldGenerator
 }
+
+func WithResourceTypeToStoreKeyMap(resourceTypeStoreKeyMapping acltypes.ResourceTypeToStoreKeyMap) optsFn {
+	return optsFn(func(k *Keeper) {
+		k.ResourceTypeStoreKeyMapping = resourceTypeStoreKeyMapping
+	})
+}

From 931e2f6a56fe71b9ad20ef5a5e97c320df67479e Mon Sep 17 00:00:00 2001
From: Steven Landers <steven@seinetwork.io>
Date: Mon, 6 Nov 2023 17:02:10 -0500
Subject: [PATCH 20/65] [OCC] Add trace spans to scheduler (#347)

## Describe your changes and provide context
- Adds trace span for `SchedulerValidate`
- Adds trace span for `SchedulerExecute`
- Mild refactor (extracted methods) to make it easier to defer span
ending

## Testing performed to validate your change
Example trace (run locally)

![image](https://github.com/sei-protocol/sei-cosmos/assets/6051744/b8a032f1-71b1-4e95-b12e-357455ebcc6d)

Example attributes of SchedulerExecute operation

![image](https://github.com/sei-protocol/sei-cosmos/assets/6051744/68992e84-4000-44c1-8597-9d4c10583a66)
---
 baseapp/abci.go         |   2 +-
 tasks/scheduler.go      | 128 +++++++++++++++++++++++++---------------
 tasks/scheduler_test.go |  16 ++++-
 3 files changed, 93 insertions(+), 53 deletions(-)

diff --git a/baseapp/abci.go b/baseapp/abci.go
index fed748722..ab4967f27 100644
--- a/baseapp/abci.go
+++ b/baseapp/abci.go
@@ -238,7 +238,7 @@ func (app *BaseApp) CheckTx(ctx context.Context, req *abci.RequestCheckTx) (*abc
 
 // DeliverTxBatch executes multiple txs
 func (app *BaseApp) DeliverTxBatch(ctx sdk.Context, req sdk.DeliverTxBatchRequest) (res sdk.DeliverTxBatchResponse) {
-	scheduler := tasks.NewScheduler(app.concurrencyWorkers, app.DeliverTx)
+	scheduler := tasks.NewScheduler(app.concurrencyWorkers, app.TracingInfo, app.DeliverTx)
 	// This will basically no-op the actual prefill if the metadata for the txs is empty
 
 	// process all txs, this will also initializes the MVS if prefill estimates was disabled
diff --git a/tasks/scheduler.go b/tasks/scheduler.go
index a38ef7f96..7b1afc0d2 100644
--- a/tasks/scheduler.go
+++ b/tasks/scheduler.go
@@ -1,15 +1,20 @@
 package tasks
 
 import (
+	"crypto/sha256"
+	"fmt"
 	"sort"
 
 	"github.com/tendermint/tendermint/abci/types"
+	"go.opentelemetry.io/otel/attribute"
+	"go.opentelemetry.io/otel/trace"
 	"golang.org/x/sync/errgroup"
 
 	"github.com/cosmos/cosmos-sdk/store/multiversion"
 	store "github.com/cosmos/cosmos-sdk/store/types"
 	sdk "github.com/cosmos/cosmos-sdk/types"
 	"github.com/cosmos/cosmos-sdk/types/occ"
+	"github.com/cosmos/cosmos-sdk/utils/tracing"
 )
 
 type status string
@@ -33,6 +38,7 @@ const (
 
 type deliverTxTask struct {
 	Ctx     sdk.Context
+	Span    trace.Span
 	AbortCh chan occ.Abort
 
 	Status        status
@@ -64,13 +70,15 @@ type scheduler struct {
 	deliverTx          func(ctx sdk.Context, req types.RequestDeliverTx) (res types.ResponseDeliverTx)
 	workers            int
 	multiVersionStores map[sdk.StoreKey]multiversion.MultiVersionStore
+	tracingInfo        *tracing.Info
 }
 
 // NewScheduler creates a new scheduler
-func NewScheduler(workers int, deliverTxFunc func(ctx sdk.Context, req types.RequestDeliverTx) (res types.ResponseDeliverTx)) Scheduler {
+func NewScheduler(workers int, tracingInfo *tracing.Info, deliverTxFunc func(ctx sdk.Context, req types.RequestDeliverTx) (res types.ResponseDeliverTx)) Scheduler {
 	return &scheduler{
-		workers:   workers,
-		deliverTx: deliverTxFunc,
+		workers:     workers,
+		deliverTx:   deliverTxFunc,
+		tracingInfo: tracingInfo,
 	}
 }
 
@@ -181,7 +189,7 @@ func (s *scheduler) ProcessAll(ctx sdk.Context, reqs []*sdk.DeliverTxEntry) ([]t
 
 		// validate returns any that should be re-executed
 		// note this processes ALL tasks, not just those recently executed
-		toExecute, err = s.validateAll(tasks)
+		toExecute, err = s.validateAll(ctx, tasks)
 		if err != nil {
 			return nil, err
 		}
@@ -195,7 +203,11 @@ func (s *scheduler) ProcessAll(ctx sdk.Context, reqs []*sdk.DeliverTxEntry) ([]t
 	return collectResponses(tasks), nil
 }
 
-func (s *scheduler) validateAll(tasks []*deliverTxTask) ([]*deliverTxTask, error) {
+func (s *scheduler) validateAll(ctx sdk.Context, tasks []*deliverTxTask) ([]*deliverTxTask, error) {
+	spanCtx, span := s.tracingInfo.StartWithContext("SchedulerValidate", ctx.TraceSpanContext())
+	ctx = ctx.WithTraceSpanContext(spanCtx)
+	defer span.End()
+
 	var res []*deliverTxTask
 
 	// find first non-validated entry
@@ -263,24 +275,7 @@ func (s *scheduler) executeAll(ctx sdk.Context, tasks []*deliverTxTask) error {
 					if !ok {
 						return nil
 					}
-
-					resp := s.deliverTx(task.Ctx, task.Request)
-
-					close(task.AbortCh)
-
-					if abt, ok := <-task.AbortCh; ok {
-						task.Status = statusAborted
-						task.Abort = &abt
-						continue
-					}
-
-					// write from version store to multiversion stores
-					for _, v := range task.VersionStores {
-						v.WriteToMultiVersionStore()
-					}
-
-					task.Status = statusExecuted
-					task.Response = &resp
+					s.executeTask(task)
 				}
 			}
 		})
@@ -288,32 +283,7 @@ func (s *scheduler) executeAll(ctx sdk.Context, tasks []*deliverTxTask) error {
 	grp.Go(func() error {
 		defer close(ch)
 		for _, task := range tasks {
-			// initialize the context
-			ctx = ctx.WithTxIndex(task.Index)
-			abortCh := make(chan occ.Abort, len(s.multiVersionStores))
-
-			// if there are no stores, don't try to wrap, because there's nothing to wrap
-			if len(s.multiVersionStores) > 0 {
-				// non-blocking
-				cms := ctx.MultiStore().CacheMultiStore()
-
-				// init version stores by store key
-				vs := make(map[store.StoreKey]*multiversion.VersionIndexedStore)
-				for storeKey, mvs := range s.multiVersionStores {
-					vs[storeKey] = mvs.VersionedIndexedStore(task.Index, task.Incarnation, abortCh)
-				}
-
-				// save off version store so we can ask it things later
-				task.VersionStores = vs
-				ms := cms.SetKVStores(func(k store.StoreKey, kvs sdk.KVStore) store.CacheWrap {
-					return vs[k]
-				})
-
-				ctx = ctx.WithMultiStore(ms)
-			}
-
-			task.AbortCh = abortCh
-			task.Ctx = ctx
+			s.prepareTask(ctx, task)
 
 			select {
 			case <-gCtx.Done():
@@ -330,3 +300,63 @@ func (s *scheduler) executeAll(ctx sdk.Context, tasks []*deliverTxTask) error {
 
 	return nil
 }
+
+// prepareTask initializes the context and version stores for a task
+func (s *scheduler) prepareTask(ctx sdk.Context, task *deliverTxTask) {
+	// initialize the context
+	ctx = ctx.WithTxIndex(task.Index)
+	abortCh := make(chan occ.Abort, len(s.multiVersionStores))
+	spanCtx, span := s.tracingInfo.StartWithContext("SchedulerExecute", ctx.TraceSpanContext())
+	span.SetAttributes(attribute.String("txHash", fmt.Sprintf("%X", sha256.Sum256(task.Request.Tx))))
+	span.SetAttributes(attribute.Int("txIndex", task.Index))
+	span.SetAttributes(attribute.Int("txIncarnation", task.Incarnation))
+	ctx = ctx.WithTraceSpanContext(spanCtx)
+
+	// if there are no stores, don't try to wrap, because there's nothing to wrap
+	if len(s.multiVersionStores) > 0 {
+		// non-blocking
+		cms := ctx.MultiStore().CacheMultiStore()
+
+		// init version stores by store key
+		vs := make(map[store.StoreKey]*multiversion.VersionIndexedStore)
+		for storeKey, mvs := range s.multiVersionStores {
+			vs[storeKey] = mvs.VersionedIndexedStore(task.Index, task.Incarnation, abortCh)
+		}
+
+		// save off version store so we can ask it things later
+		task.VersionStores = vs
+		ms := cms.SetKVStores(func(k store.StoreKey, kvs sdk.KVStore) store.CacheWrap {
+			return vs[k]
+		})
+
+		ctx = ctx.WithMultiStore(ms)
+	}
+
+	task.AbortCh = abortCh
+	task.Ctx = ctx
+	task.Span = span
+}
+
+// executeTask executes a single task
+func (s *scheduler) executeTask(task *deliverTxTask) {
+	if task.Span != nil {
+		defer task.Span.End()
+	}
+	resp := s.deliverTx(task.Ctx, task.Request)
+
+	close(task.AbortCh)
+
+	if abt, ok := <-task.AbortCh; ok {
+		task.Status = statusAborted
+		task.Abort = &abt
+		return
+	}
+
+	// write from version store to multiversion stores
+	for _, v := range task.VersionStores {
+		v.WriteToMultiVersionStore()
+	}
+
+	task.Status = statusExecuted
+	task.Response = &resp
+}
diff --git a/tasks/scheduler_test.go b/tasks/scheduler_test.go
index 5cf2be6ba..accc8bf3e 100644
--- a/tasks/scheduler_test.go
+++ b/tasks/scheduler_test.go
@@ -6,15 +6,17 @@ import (
 	"fmt"
 	"testing"
 
-	"github.com/cosmos/cosmos-sdk/store/cachemulti"
-
 	"github.com/stretchr/testify/require"
 	"github.com/tendermint/tendermint/abci/types"
 	dbm "github.com/tendermint/tm-db"
+	"go.opentelemetry.io/otel"
+	"go.opentelemetry.io/otel/trace"
 
 	"github.com/cosmos/cosmos-sdk/store/cachekv"
+	"github.com/cosmos/cosmos-sdk/store/cachemulti"
 	"github.com/cosmos/cosmos-sdk/store/dbadapter"
 	sdk "github.com/cosmos/cosmos-sdk/types"
+	"github.com/cosmos/cosmos-sdk/utils/tracing"
 )
 
 type mockDeliverTxFunc func(ctx sdk.Context, req types.RequestDeliverTx) types.ResponseDeliverTx
@@ -119,7 +121,15 @@ func TestProcessAll(t *testing.T) {
 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
 			for i := 0; i < tt.runs; i++ {
-				s := NewScheduler(tt.workers, tt.deliverTxFunc)
+				// set a tracer provider
+				tp := trace.NewNoopTracerProvider()
+				otel.SetTracerProvider(trace.NewNoopTracerProvider())
+				tr := tp.Tracer("scheduler-test")
+				ti := &tracing.Info{
+					Tracer: &tr,
+				}
+
+				s := NewScheduler(tt.workers, ti, tt.deliverTxFunc)
 				ctx := initTestCtx(tt.addStores)
 
 				res, err := s.ProcessAll(ctx, tt.requests)

From eac865739529a4549073e5e86a80ece402b8ce59 Mon Sep 17 00:00:00 2001
From: Uday Patil <udpatil@gmail.com>
Date: Fri, 10 Nov 2023 15:43:10 -0600
Subject: [PATCH 21/65] [occ] Fix parent store readset validation (#348)

## Describe your changes and provide context
This fixes the validation to remove a panic for a case that can actually
occur if a transaction writes a key that is later read, and that writing
transaction is reverted and then the readset validation reads from
parent store. In this case, the readset would have a conflict based on
the data available in parent store, so we shouldn't panic. This also
adds in the resource types needed for the new DEX_MEM keys

## Testing performed to validate your change
Tested in loadtest cluster
---
 proto/cosmos/accesscontrol/constants.proto |   4 +
 store/multiversion/store.go                |   4 +-
 store/multiversion/store_test.go           |  47 ++++-
 types/accesscontrol/constants.pb.go        | 192 +++++++++++----------
 types/accesscontrol/resource.go            |   4 +
 5 files changed, 154 insertions(+), 97 deletions(-)

diff --git a/proto/cosmos/accesscontrol/constants.proto b/proto/cosmos/accesscontrol/constants.proto
index a8820fb48..f8c977a73 100644
--- a/proto/cosmos/accesscontrol/constants.proto
+++ b/proto/cosmos/accesscontrol/constants.proto
@@ -130,7 +130,11 @@ enum ResourceType {
     KV_DEX_SHORT_ORDER_COUNT = 92; // child of KV_DEX
 
     KV_BANK_DEFERRED = 93; // child of KV
+    reserved 94;
     KV_BANK_DEFERRED_MODULE_TX_INDEX = 95; // child of KV_BANK_DEFERRED
+
+    KV_DEX_MEM_CONTRACTS_TO_PROCESS = 96; // child of KV_DEX_MEM
+    KV_DEX_MEM_DOWNSTREAM_CONTRACTS = 97; // child of KV_DEX_MEM
 }
 
 enum WasmMessageSubtype {
diff --git a/store/multiversion/store.go b/store/multiversion/store.go
index 2ee1c31b9..bc5e8ee4a 100644
--- a/store/multiversion/store.go
+++ b/store/multiversion/store.go
@@ -345,10 +345,10 @@ func (s *Store) checkReadsetAtIndex(index int) (bool, []int) {
 		// get the latest value from the multiversion store
 		latestValue := s.GetLatestBeforeIndex(index, []byte(key))
 		if latestValue == nil {
-			// TODO: maybe we don't even do this check?
+			// this is possible if we previously read a value from a transaction write that was later reverted, so this time we read from parent store
 			parentVal := s.parentStore.Get([]byte(key))
 			if !bytes.Equal(parentVal, value) {
-				panic("there shouldn't be readset conflicts with parent kv store, since it shouldn't change")
+				valid = false
 			}
 		} else {
 			// if estimate, mark as conflict index - but don't invalidate
diff --git a/store/multiversion/store_test.go b/store/multiversion/store_test.go
index 84e9f77ac..ae0f3afda 100644
--- a/store/multiversion/store_test.go
+++ b/store/multiversion/store_test.go
@@ -231,10 +231,51 @@ func TestMultiVersionStoreValidateState(t *testing.T) {
 	valid, conflicts = mvs.ValidateTransactionState(5)
 	require.False(t, valid)
 	require.Equal(t, []int{4}, conflicts)
+}
+
+func TestMultiVersionStoreParentValidationMismatch(t *testing.T) {
+	parentKVStore := dbadapter.Store{DB: dbm.NewMemDB()}
+	mvs := multiversion.NewMultiVersionStore(parentKVStore)
+
+	parentKVStore.Set([]byte("key2"), []byte("value0"))
+	parentKVStore.Set([]byte("key3"), []byte("value3"))
+	parentKVStore.Set([]byte("key4"), []byte("value4"))
+	parentKVStore.Set([]byte("key5"), []byte("value5"))
+
+	writeset := make(multiversion.WriteSet)
+	writeset["key1"] = []byte("value1")
+	writeset["key2"] = []byte("value2")
+	writeset["key3"] = nil
+	mvs.SetWriteset(1, 2, writeset)
 
-	// assert panic for parent store mismatch
-	parentKVStore.Set([]byte("key5"), []byte("value6"))
-	require.Panics(t, func() { mvs.ValidateTransactionState(5) })
+	readset := make(multiversion.ReadSet)
+	readset["key1"] = []byte("value1")
+	readset["key2"] = []byte("value2")
+	readset["key3"] = nil
+	readset["key4"] = []byte("value4")
+	readset["key5"] = []byte("value5")
+	mvs.SetReadset(5, readset)
+
+	// assert no readset is valid
+	valid, conflicts := mvs.ValidateTransactionState(4)
+	require.True(t, valid)
+	require.Empty(t, conflicts)
+
+	// assert readset index 5 is valid
+	valid, conflicts = mvs.ValidateTransactionState(5)
+	require.True(t, valid)
+	require.Empty(t, conflicts)
+
+	// overwrite tx writeset for tx1 - no longer writes key1
+	writeset2 := make(multiversion.WriteSet)
+	writeset2["key2"] = []byte("value2")
+	writeset2["key3"] = nil
+	mvs.SetWriteset(1, 3, writeset2)
+
+	// assert readset index 5 is invalid - because of mismatch with parent store
+	valid, conflicts = mvs.ValidateTransactionState(5)
+	require.False(t, valid)
+	require.Empty(t, conflicts)
 }
 
 func TestMVSValidationWithOnlyEstimate(t *testing.T) {
diff --git a/types/accesscontrol/constants.pb.go b/types/accesscontrol/constants.pb.go
index 1625561ed..c310dc868 100644
--- a/types/accesscontrol/constants.pb.go
+++ b/types/accesscontrol/constants.pb.go
@@ -195,6 +195,8 @@ const (
 	ResourceType_KV_DEX_SHORT_ORDER_COUNT                 ResourceType = 92
 	ResourceType_KV_BANK_DEFERRED                         ResourceType = 93
 	ResourceType_KV_BANK_DEFERRED_MODULE_TX_INDEX         ResourceType = 95
+	ResourceType_KV_DEX_MEM_CONTRACTS_TO_PROCESS          ResourceType = 96
+	ResourceType_KV_DEX_MEM_DOWNSTREAM_CONTRACTS          ResourceType = 97
 )
 
 var ResourceType_name = map[int32]string{
@@ -290,6 +292,8 @@ var ResourceType_name = map[int32]string{
 	92: "KV_DEX_SHORT_ORDER_COUNT",
 	93: "KV_BANK_DEFERRED",
 	95: "KV_BANK_DEFERRED_MODULE_TX_INDEX",
+	96: "KV_DEX_MEM_CONTRACTS_TO_PROCESS",
+	97: "KV_DEX_MEM_DOWNSTREAM_CONTRACTS",
 }
 
 var ResourceType_value = map[string]int32{
@@ -385,6 +389,8 @@ var ResourceType_value = map[string]int32{
 	"KV_DEX_SHORT_ORDER_COUNT":                 92,
 	"KV_BANK_DEFERRED":                         93,
 	"KV_BANK_DEFERRED_MODULE_TX_INDEX":         95,
+	"KV_DEX_MEM_CONTRACTS_TO_PROCESS":          96,
+	"KV_DEX_MEM_DOWNSTREAM_CONTRACTS":          97,
 }
 
 func (x ResourceType) String() string {
@@ -432,96 +438,98 @@ func init() {
 }
 
 var fileDescriptor_36568f7561081112 = []byte{
-	// 1445 bytes of a gzipped FileDescriptorProto
-	0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0x84, 0x56, 0x5b, 0x73, 0x13, 0xbf,
-	0x15, 0xcf, 0xfd, 0xa2, 0x04, 0x38, 0x28, 0xdc, 0x13, 0x0c, 0x84, 0x14, 0x68, 0x80, 0x84, 0x4b,
-	0xaf, 0xd0, 0x96, 0xca, 0xab, 0x63, 0x7b, 0xe3, 0x5d, 0x69, 0x2d, 0x69, 0x7d, 0xa1, 0xed, 0x68,
-	0x12, 0xd7, 0x43, 0x99, 0x92, 0x98, 0x89, 0x4d, 0xa7, 0xfd, 0x0c, 0x7d, 0xe9, 0xc7, 0xea, 0x23,
-	0x8f, 0x7d, 0x64, 0xe0, 0x8b, 0x74, 0xb4, 0x2b, 0x9b, 0xb5, 0x09, 0x7f, 0x9e, 0x12, 0x9f, 0xdf,
-	0x4f, 0x67, 0x75, 0x7e, 0xe7, 0x26, 0xb2, 0xd3, 0xed, 0x0f, 0x8e, 0xfb, 0x83, 0xfd, 0xc3, 0x6e,
-	0xb7, 0x37, 0x18, 0x74, 0xfb, 0x27, 0xc3, 0xd3, 0xfe, 0xbb, 0xfd, 0x6e, 0xff, 0x64, 0x30, 0x3c,
-	0x3c, 0x19, 0x0e, 0xf6, 0xde, 0x9f, 0xf6, 0x87, 0x7d, 0xba, 0x95, 0xb3, 0xf6, 0x26, 0x58, 0x7b,
-	0xff, 0x78, 0x7a, 0xd4, 0x1b, 0x1e, 0x3e, 0xdd, 0x7d, 0x41, 0x08, 0xcb, 0x00, 0xf3, 0xaf, 0xf7,
-	0x3d, 0xba, 0x46, 0x96, 0x53, 0x51, 0x17, 0xb2, 0x25, 0x60, 0x86, 0xae, 0x90, 0x05, 0x85, 0x8c,
-	0xc3, 0x2c, 0x5d, 0x25, 0x8b, 0x2d, 0x15, 0x1a, 0x84, 0x39, 0x4a, 0xc8, 0x52, 0x20, 0xe3, 0x38,
-	0x34, 0x30, 0xbf, 0xfb, 0xef, 0x39, 0xb2, 0x99, 0x1f, 0x96, 0xef, 0x7b, 0xa7, 0x87, 0xc3, 0xb7,
-	0xfd, 0x13, 0xdd, 0x7b, 0xd7, 0xeb, 0x0e, 0xfb, 0xa7, 0x99, 0xb7, 0x15, 0xb2, 0x20, 0xa4, 0x40,
-	0x98, 0xa1, 0x4b, 0x64, 0xee, 0xa0, 0x01, 0xb3, 0xf4, 0x32, 0xb9, 0x78, 0xd0, 0xb0, 0x65, 0x0c,
-	0x6a, 0xcf, 0x9f, 0x59, 0xc6, 0xb9, 0x42, 0xad, 0x61, 0x8e, 0x96, 0xc8, 0x8d, 0x83, 0x86, 0x8d,
-	0x50, 0x54, 0x4d, 0xcd, 0x26, 0x0a, 0x2b, 0x61, 0x1b, 0xf9, 0x18, 0x9f, 0xa7, 0xd7, 0xc9, 0x65,
-	0x8d, 0x82, 0xa3, 0x9a, 0x3e, 0xba, 0x40, 0xb7, 0x49, 0xc9, 0x43, 0xdf, 0x3b, 0xbe, 0x48, 0x2f,
-	0x11, 0x08, 0xa4, 0x30, 0x8a, 0x05, 0x66, 0x6c, 0x5d, 0xa2, 0x37, 0xc8, 0x95, 0x83, 0x86, 0x8d,
-	0x51, 0x6b, 0x56, 0x45, 0x1b, 0x48, 0xc1, 0x43, 0x13, 0x4a, 0xc1, 0x22, 0x58, 0x76, 0x58, 0x20,
-	0x85, 0x36, 0x4c, 0x18, 0xab, 0x8d, 0x0a, 0x45, 0xd5, 0x1a, 0x69, 0x6b, 0xd8, 0x86, 0x15, 0x7a,
-	0x85, 0xd0, 0xb1, 0x37, 0x85, 0x15, 0x54, 0x28, 0x02, 0x84, 0xd5, 0xdd, 0x4f, 0x1b, 0x64, 0x5d,
-	0xf5, 0x06, 0xfd, 0x0f, 0xa7, 0xdd, 0x5e, 0x16, 0xfe, 0x32, 0x99, 0x67, 0xa2, 0x93, 0x47, 0x5f,
-	0x6f, 0xc2, 0xac, 0x33, 0xc4, 0xbd, 0xe3, 0x5c, 0x44, 0xde, 0xfb, 0xa7, 0xfb, 0x7f, 0xde, 0x49,
-	0x5e, 0x6f, 0xda, 0x32, 0x13, 0x75, 0x58, 0xa0, 0xe7, 0x09, 0xa9, 0x37, 0xad, 0x36, 0xac, 0x1e,
-	0x8a, 0x2a, 0x2c, 0x7a, 0xb0, 0xc5, 0x74, 0x0c, 0x4b, 0xf4, 0x1c, 0x59, 0xad, 0x37, 0xad, 0x54,
-	0x2c, 0x88, 0x10, 0x96, 0x9d, 0x93, 0x7a, 0xd3, 0xf2, 0xec, 0x4e, 0xeb, 0x64, 0xa5, 0xde, 0xb4,
-	0x98, 0xc8, 0xa0, 0x06, 0xab, 0x74, 0x83, 0x5c, 0xa8, 0x37, 0xad, 0x91, 0x75, 0x14, 0x15, 0x16,
-	0x18, 0xa9, 0x3a, 0x40, 0x5c, 0x48, 0xe3, 0xd3, 0xb6, 0x29, 0x0d, 0x5a, 0xc3, 0x54, 0x15, 0x8d,
-	0x86, 0x35, 0x7a, 0x93, 0x5c, 0xff, 0x8a, 0xb1, 0x6a, 0x55, 0x61, 0x95, 0x99, 0x9c, 0xa5, 0x61,
-	0xdd, 0x65, 0xed, 0x2b, 0x5c, 0x41, 0xe4, 0xa8, 0x34, 0x9c, 0x73, 0x59, 0xf9, 0x7a, 0x59, 0xcb,
-	0x31, 0x72, 0xa7, 0x42, 0x29, 0xe0, 0x3c, 0xbd, 0x46, 0x2e, 0x15, 0xa0, 0x26, 0x8b, 0x42, 0xce,
-	0x8c, 0x54, 0x70, 0xc1, 0x47, 0xc4, 0x52, 0x53, 0x03, 0xf0, 0x1e, 0xdc, 0x8f, 0x51, 0x5e, 0xac,
-	0x36, 0x52, 0x21, 0x5c, 0xa4, 0x94, 0x9c, 0xf7, 0xb2, 0x58, 0x9d, 0x26, 0x49, 0xd4, 0x01, 0x4a,
-	0x2f, 0x92, 0x73, 0x23, 0x1b, 0x47, 0x21, 0x63, 0xd8, 0x70, 0xa9, 0x1d, 0x99, 0xca, 0x2c, 0x62,
-	0x22, 0x40, 0x0d, 0x97, 0xbc, 0xdf, 0xa2, 0x00, 0xfe, 0xc0, 0x65, 0xba, 0x45, 0xae, 0x4d, 0x43,
-	0x31, 0x1a, 0xc6, 0x99, 0x61, 0x70, 0xe5, 0xac, 0x83, 0x8c, 0xc7, 0xa1, 0x80, 0xab, 0x74, 0x93,
-	0x5c, 0x9d, 0x86, 0x02, 0x85, 0x59, 0x54, 0xd7, 0x3c, 0xe8, 0x15, 0xc2, 0x76, 0x50, 0x63, 0xa2,
-	0x8a, 0x56, 0x31, 0x83, 0x70, 0xdd, 0x95, 0xe8, 0x94, 0xf2, 0x09, 0x0a, 0x16, 0x99, 0x8e, 0x0d,
-	0x64, 0x2a, 0x0c, 0x2a, 0xb8, 0xe1, 0xaf, 0xe5, 0x39, 0x89, 0x0a, 0x03, 0xb4, 0x5a, 0xb0, 0x44,
-	0xd7, 0xa4, 0x81, 0x4d, 0x7a, 0x8b, 0x6c, 0x7e, 0x2b, 0x67, 0x28, 0x85, 0x4d, 0x64, 0x0b, 0x15,
-	0x6c, 0xf9, 0xe4, 0x8e, 0x08, 0x46, 0x1a, 0x16, 0x79, 0xec, 0xa6, 0xff, 0xfc, 0x37, 0xb9, 0xd0,
-	0xae, 0xe4, 0x33, 0xd9, 0xa1, 0x44, 0xef, 0x92, 0x5b, 0x05, 0x4e, 0x2a, 0xca, 0xae, 0x1b, 0x26,
-	0x93, 0x7a, 0x8b, 0xde, 0x27, 0x77, 0x7f, 0x40, 0x72, 0xde, 0xe1, 0xb6, 0x57, 0x63, 0x44, 0x54,
-	0x58, 0xf0, 0x72, 0x67, 0xea, 0x53, 0x45, 0xd0, 0x9d, 0xb6, 0x5a, 0x05, 0xb0, 0xfd, 0x23, 0x12,
-	0xd7, 0x06, 0xee, 0xd2, 0x3b, 0xe4, 0xe6, 0xf7, 0x48, 0x8d, 0x14, 0x53, 0x84, 0x1d, 0x37, 0x58,
-	0xce, 0x8a, 0xdd, 0xe3, 0x3f, 0x9b, 0xc2, 0x6b, 0xa1, 0xab, 0xbe, 0x30, 0x60, 0x91, 0x0d, 0x45,
-	0x45, 0xc2, 0xbd, 0xa9, 0x3a, 0x1e, 0x87, 0x0c, 0xf7, 0xbf, 0xaf, 0x6a, 0xb9, 0xe3, 0x95, 0xff,
-	0xb9, 0xef, 0x43, 0x1e, 0xba, 0x09, 0x52, 0x4e, 0xb3, 0xf8, 0x1f, 0xf8, 0x4c, 0x17, 0x8d, 0xae,
-	0xa5, 0x6c, 0x22, 0x65, 0x04, 0xbb, 0xf4, 0x36, 0xd9, 0x9a, 0x46, 0x13, 0x25, 0x13, 0xa9, 0x51,
-	0xd9, 0x3a, 0x76, 0xe0, 0xa1, 0xcf, 0xc2, 0x04, 0x43, 0xa6, 0xc6, 0x8d, 0x2a, 0x9e, 0xcb, 0xd0,
-	0x62, 0x8a, 0x6b, 0x78, 0x44, 0x1f, 0x92, 0xfb, 0xd3, 0x44, 0xaf, 0x90, 0x54, 0xb6, 0x15, 0x9a,
-	0x1a, 0x57, 0xac, 0x95, 0x17, 0xc0, 0xe3, 0x9f, 0x26, 0x6b, 0xc3, 0x94, 0x71, 0xce, 0x33, 0x55,
-	0xf6, 0xe8, 0x2e, 0xb9, 0x37, 0x4d, 0x76, 0x59, 0x29, 0xc8, 0x37, 0xba, 0xc5, 0xfe, 0x59, 0xd7,
-	0x75, 0xdc, 0x20, 0x55, 0x0a, 0x85, 0x19, 0x13, 0x9f, 0xd0, 0x07, 0x64, 0xe7, 0x2c, 0x22, 0x0b,
-	0x82, 0x34, 0xb6, 0xd9, 0xca, 0xd1, 0xda, 0x29, 0xf8, 0xd4, 0x77, 0xc3, 0x04, 0x53, 0x47, 0x4c,
-	0xd7, 0x2c, 0x36, 0x51, 0x18, 0x78, 0x36, 0x92, 0x18, 0xdb, 0x76, 0x3c, 0xa8, 0x23, 0x29, 0xaa,
-	0x65, 0x29, 0xeb, 0xf0, 0xdc, 0x0f, 0xbb, 0x09, 0x54, 0xd7, 0xa4, 0x32, 0x19, 0xfc, 0x0b, 0x3f,
-	0xec, 0x1c, 0xac, 0xd1, 0x98, 0x08, 0x63, 0xe7, 0xf3, 0x97, 0x6e, 0xea, 0x7b, 0x73, 0xc2, 0x42,
-	0xe5, 0xb7, 0x0c, 0xfc, 0x8a, 0x5e, 0x20, 0x6b, 0xde, 0x6e, 0x5a, 0x2c, 0x81, 0x5f, 0x53, 0x20,
-	0xeb, 0x23, 0xa2, 0x6b, 0x63, 0xf8, 0x8d, 0x6f, 0x87, 0x49, 0x8f, 0x16, 0x85, 0x51, 0x1d, 0xf8,
-	0xad, 0xef, 0x5c, 0x07, 0x2a, 0xac, 0x86, 0xda, 0xa0, 0x42, 0x9e, 0x7d, 0x02, 0x5e, 0x14, 0x5c,
-	0x49, 0xc5, 0x51, 0xc1, 0xef, 0xfc, 0x04, 0xcc, 0xee, 0xee, 0x66, 0x5d, 0x04, 0xbf, 0x1f, 0x55,
-	0x0c, 0xb6, 0x9d, 0x54, 0x6e, 0x9e, 0x58, 0x16, 0x98, 0xb0, 0x89, 0xf9, 0x19, 0x0d, 0x7f, 0x28,
-	0x44, 0xc4, 0xb4, 0x46, 0x63, 0xa3, 0x50, 0x1b, 0xf8, 0xa3, 0xaf, 0x6d, 0x67, 0x16, 0xd8, 0x36,
-	0x39, 0xdd, 0x86, 0x1c, 0x58, 0x41, 0xa1, 0x0c, 0x29, 0xdc, 0x3a, 0xe4, 0x50, 0xa6, 0x57, 0xc9,
-	0x86, 0x87, 0x63, 0x66, 0x82, 0x9a, 0x55, 0xa8, 0xd3, 0xc8, 0x40, 0xe0, 0xbb, 0x69, 0x2a, 0xd0,
-	0xb1, 0x5f, 0x5e, 0xb8, 0x48, 0x6e, 0xcc, 0x14, 0x47, 0x3f, 0xc3, 0x59, 0x10, 0xa0, 0xd6, 0x59,
-	0x4a, 0x64, 0x04, 0x55, 0xfa, 0x88, 0x3c, 0x98, 0xb6, 0x66, 0x8b, 0xd0, 0x72, 0x4c, 0xdc, 0xc2,
-	0x17, 0x41, 0xc7, 0xc6, 0x2c, 0x49, 0x5c, 0x3b, 0xd6, 0xbc, 0x54, 0x19, 0x1e, 0x48, 0x8e, 0x10,
-	0xfa, 0x22, 0xf0, 0x96, 0xa9, 0xe5, 0x7f, 0xe0, 0x65, 0x9f, 0x44, 0xf3, 0xd5, 0x53, 0xf7, 0xc2,
-	0x64, 0x98, 0xc6, 0x46, 0xea, 0xd6, 0x7b, 0xd6, 0x7b, 0x91, 0x9f, 0x38, 0x93, 0xa7, 0xdc, 0xe7,
-	0x7c, 0xe9, 0x77, 0x20, 0xf6, 0xc5, 0x39, 0x49, 0x29, 0x77, 0x72, 0x56, 0xc8, 0x41, 0x78, 0x71,
-	0x33, 0x42, 0x12, 0x0a, 0x81, 0xdc, 0x63, 0xc2, 0x6d, 0x72, 0xe9, 0x3f, 0x91, 0xad, 0xc4, 0x6a,
-	0x24, 0xcb, 0x79, 0x07, 0x64, 0x69, 0x15, 0x69, 0x5c, 0x46, 0x05, 0x89, 0x5f, 0xf6, 0x8e, 0xf2,
-	0x1a, 0x1a, 0xbe, 0x00, 0x2b, 0x88, 0x55, 0xc5, 0x84, 0x01, 0xe5, 0x77, 0xd8, 0xc8, 0x60, 0x59,
-	0x14, 0xc9, 0x96, 0x2b, 0x16, 0xd0, 0x9e, 0x9b, 0x35, 0x8b, 0x93, 0xcd, 0xf8, 0xe2, 0x19, 0x19,
-	0xf2, 0x01, 0x1c, 0x56, 0xc5, 0xb8, 0xd7, 0x53, 0xdf, 0x96, 0x63, 0x86, 0x53, 0xd0, 0x26, 0x69,
-	0xb9, 0x8e, 0x1d, 0xab, 0x30, 0xca, 0xa7, 0xad, 0x13, 0xa7, 0xe9, 0xd3, 0x98, 0x95, 0x05, 0xc6,
-	0xbe, 0x62, 0x5b, 0x85, 0x9c, 0x3b, 0xab, 0xaf, 0xda, 0x76, 0xa1, 0x9d, 0x9c, 0x99, 0x63, 0x22,
-	0x75, 0x68, 0xa0, 0x33, 0x1a, 0x99, 0x85, 0xe6, 0x84, 0xd7, 0x85, 0x06, 0x72, 0x6d, 0xec, 0x8b,
-	0x27, 0x13, 0x05, 0xfe, 0x54, 0x68, 0xf6, 0xac, 0x8b, 0x27, 0xd0, 0x3f, 0x17, 0xdf, 0x07, 0xdc,
-	0xbd, 0xd5, 0x14, 0x72, 0xf8, 0x0b, 0xdd, 0x21, 0xb7, 0xa7, 0xad, 0x36, 0x96, 0x3c, 0x8d, 0xd0,
-	0x9a, 0xb6, 0x4f, 0x85, 0xdd, 0x5e, 0x58, 0x79, 0x09, 0x2f, 0xb7, 0x17, 0x56, 0x5e, 0xc1, 0xab,
-	0xed, 0x85, 0x95, 0x0a, 0x54, 0x76, 0x1f, 0x11, 0xda, 0x3a, 0x1c, 0x1c, 0xc7, 0xbd, 0xc1, 0xe0,
-	0xf0, 0x4d, 0x4f, 0x7f, 0x38, 0x1a, 0xba, 0x77, 0xde, 0x2a, 0x59, 0x6c, 0xa4, 0xa8, 0xdc, 0x4b,
-	0x6f, 0x8d, 0x2c, 0x63, 0x1b, 0x83, 0xd4, 0x20, 0xcc, 0x96, 0x0f, 0xfe, 0xfb, 0xb9, 0x34, 0xfb,
-	0xf1, 0x73, 0x69, 0xf6, 0xd3, 0xe7, 0xd2, 0xec, 0x7f, 0xbe, 0x94, 0x66, 0x3e, 0x7e, 0x29, 0xcd,
-	0xfc, 0xef, 0x4b, 0x69, 0xe6, 0xf5, 0x93, 0x37, 0x6f, 0x87, 0x7f, 0xfb, 0x70, 0xb4, 0xd7, 0xed,
-	0x1f, 0xef, 0xfb, 0x37, 0x7c, 0xfe, 0xe7, 0xf1, 0xe0, 0xaf, 0x7f, 0xdf, 0x77, 0x4e, 0xa7, 0x1e,
-	0xf5, 0x47, 0x4b, 0xd9, 0x5b, 0xfe, 0xf9, 0xff, 0x03, 0x00, 0x00, 0xff, 0xff, 0x19, 0x02, 0xe2,
-	0x5e, 0xf3, 0x0b, 0x00, 0x00,
+	// 1487 bytes of a gzipped FileDescriptorProto
+	0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0x84, 0x57, 0xdb, 0x76, 0x1b, 0xb7,
+	0x15, 0x95, 0x2c, 0x4a, 0xa2, 0x20, 0xd9, 0x3e, 0x86, 0xe5, 0xab, 0x64, 0xda, 0x91, 0xd5, 0xd8,
+	0x55, 0x1c, 0x29, 0x8e, 0x7b, 0x4d, 0xda, 0xa6, 0xe0, 0xe0, 0x90, 0x1c, 0x71, 0x06, 0x18, 0x02,
+	0x18, 0x5e, 0xdc, 0x36, 0xa8, 0xc4, 0x72, 0xa5, 0x59, 0x8d, 0x44, 0x2f, 0x91, 0xee, 0x6a, 0xbf,
+	0xa1, 0x2f, 0xfd, 0x94, 0x7e, 0x46, 0x1f, 0xf3, 0xd8, 0xc7, 0x2e, 0xfb, 0x47, 0xba, 0x30, 0x03,
+	0xd2, 0xc3, 0x89, 0x5c, 0x3f, 0x49, 0x3c, 0x7b, 0xe3, 0x0c, 0xce, 0x3e, 0xb7, 0x19, 0xb2, 0x3f,
+	0x1c, 0x4f, 0xce, 0xc6, 0x93, 0xa3, 0x93, 0xe1, 0x70, 0x34, 0x99, 0x0c, 0xc7, 0xe7, 0xd3, 0x8b,
+	0xf1, 0x77, 0x47, 0xc3, 0xf1, 0xf9, 0x64, 0x7a, 0x72, 0x3e, 0x9d, 0x1c, 0xbe, 0xba, 0x18, 0x4f,
+	0xc7, 0x74, 0x37, 0x67, 0x1d, 0x2e, 0xb0, 0x0e, 0xff, 0xfa, 0xfc, 0x74, 0x34, 0x3d, 0x79, 0x7e,
+	0xf0, 0x05, 0x21, 0x2c, 0x03, 0xcc, 0xdf, 0x5f, 0x8d, 0xe8, 0x26, 0x59, 0x4f, 0x45, 0x5b, 0xc8,
+	0x9e, 0x80, 0x25, 0x5a, 0x25, 0x15, 0x85, 0x8c, 0xc3, 0x32, 0xdd, 0x20, 0xab, 0x3d, 0x15, 0x1a,
+	0x84, 0x2b, 0x94, 0x90, 0xb5, 0x40, 0xc6, 0x71, 0x68, 0x60, 0xe5, 0xe0, 0x1f, 0x57, 0xc8, 0x4e,
+	0x7e, 0x58, 0xbe, 0x1a, 0x5d, 0x9c, 0x4c, 0xbf, 0x1d, 0x9f, 0xeb, 0xd1, 0x77, 0xa3, 0xe1, 0x74,
+	0x7c, 0x91, 0x79, 0xab, 0x92, 0x8a, 0x90, 0x02, 0x61, 0x89, 0xae, 0x91, 0x2b, 0xc7, 0x1d, 0x58,
+	0xa6, 0xb7, 0xc8, 0x8d, 0xe3, 0x8e, 0xad, 0x63, 0xd0, 0x7a, 0xf1, 0xb9, 0x65, 0x9c, 0x2b, 0xd4,
+	0x1a, 0xae, 0xd0, 0x1a, 0xb9, 0x7f, 0xdc, 0xb1, 0x11, 0x8a, 0xa6, 0x69, 0xd9, 0x44, 0x61, 0x23,
+	0xec, 0x23, 0x9f, 0xe3, 0x2b, 0xf4, 0x1e, 0xb9, 0xa5, 0x51, 0x70, 0x54, 0xe5, 0xa3, 0x15, 0xba,
+	0x47, 0x6a, 0x1e, 0x7a, 0xdf, 0xf1, 0x55, 0xba, 0x4d, 0x20, 0x90, 0xc2, 0x28, 0x16, 0x98, 0xb9,
+	0x75, 0x8d, 0xde, 0x27, 0xb7, 0x8f, 0x3b, 0x36, 0x46, 0xad, 0x59, 0x13, 0x6d, 0x20, 0x05, 0x0f,
+	0x4d, 0x28, 0x05, 0x8b, 0x60, 0xdd, 0x61, 0x81, 0x14, 0xda, 0x30, 0x61, 0xac, 0x36, 0x2a, 0x14,
+	0x4d, 0x6b, 0xa4, 0x6d, 0x61, 0x1f, 0xaa, 0xf4, 0x36, 0xa1, 0x73, 0x6f, 0x0a, 0x1b, 0xa8, 0x50,
+	0x04, 0x08, 0x1b, 0x07, 0xff, 0xda, 0x26, 0x5b, 0x6a, 0x34, 0x19, 0xbf, 0xbe, 0x18, 0x8e, 0xb2,
+	0xf0, 0xd7, 0xc9, 0x0a, 0x13, 0x83, 0x3c, 0xfa, 0x76, 0x17, 0x96, 0x9d, 0x21, 0x1e, 0x9d, 0xe5,
+	0x22, 0xf2, 0xd1, 0xdf, 0xdc, 0xff, 0x2b, 0x4e, 0xf2, 0x76, 0xd7, 0xd6, 0x99, 0x68, 0x43, 0x85,
+	0x5e, 0x23, 0xa4, 0xdd, 0xb5, 0xda, 0xb0, 0x76, 0x28, 0x9a, 0xb0, 0xea, 0xc1, 0x1e, 0xd3, 0x31,
+	0xac, 0xd1, 0xab, 0x64, 0xa3, 0xdd, 0xb5, 0x52, 0xb1, 0x20, 0x42, 0x58, 0x77, 0x4e, 0xda, 0x5d,
+	0xcb, 0xb3, 0x3b, 0x6d, 0x91, 0x6a, 0xbb, 0x6b, 0x31, 0x91, 0x41, 0x0b, 0x36, 0xe8, 0x4d, 0x72,
+	0xbd, 0xdd, 0xb5, 0x46, 0xb6, 0x51, 0x34, 0x58, 0x60, 0xa4, 0x1a, 0x00, 0x71, 0x21, 0xcd, 0x4f,
+	0xdb, 0xae, 0x34, 0x68, 0x0d, 0x53, 0x4d, 0x34, 0x1a, 0x36, 0xe9, 0x03, 0x72, 0xef, 0x1d, 0xc6,
+	0x9a, 0x4d, 0x85, 0x4d, 0x66, 0x72, 0x96, 0x86, 0x2d, 0x97, 0xb5, 0x77, 0x70, 0x03, 0x91, 0xa3,
+	0xd2, 0x70, 0xd5, 0x65, 0xe5, 0xdd, 0x65, 0x2d, 0xc7, 0xc8, 0x9d, 0x0a, 0xa5, 0x80, 0x6b, 0xf4,
+	0x2e, 0xd9, 0x2e, 0x40, 0x5d, 0x16, 0x85, 0x9c, 0x19, 0xa9, 0xe0, 0xba, 0x8f, 0x88, 0xa5, 0xa6,
+	0x05, 0xe0, 0x3d, 0xb8, 0x1f, 0xb3, 0xbc, 0x58, 0x6d, 0xa4, 0x42, 0xb8, 0x41, 0x29, 0xb9, 0xe6,
+	0x65, 0xb1, 0x3a, 0x4d, 0x92, 0x68, 0x00, 0x94, 0xde, 0x20, 0x57, 0x67, 0x36, 0x8e, 0x42, 0xc6,
+	0x70, 0xd3, 0xa5, 0x76, 0x66, 0xaa, 0xb3, 0x88, 0x89, 0x00, 0x35, 0x6c, 0x7b, 0xbf, 0x45, 0x01,
+	0xfc, 0x81, 0x5b, 0x74, 0x97, 0xdc, 0x2d, 0x43, 0x31, 0x1a, 0xc6, 0x99, 0x61, 0x70, 0xfb, 0xb2,
+	0x83, 0x8c, 0xc7, 0xa1, 0x80, 0x3b, 0x74, 0x87, 0xdc, 0x29, 0x43, 0x81, 0xc2, 0x2c, 0xaa, 0xbb,
+	0x1e, 0xf4, 0x0a, 0x61, 0x3f, 0x68, 0x31, 0xd1, 0x44, 0xab, 0x98, 0x41, 0xb8, 0xe7, 0x4a, 0xb4,
+	0xa4, 0x7c, 0x82, 0x82, 0x45, 0x66, 0x60, 0x03, 0x99, 0x0a, 0x83, 0x0a, 0xee, 0xfb, 0x6b, 0x79,
+	0x4e, 0xa2, 0xc2, 0x00, 0xad, 0x16, 0x2c, 0xd1, 0x2d, 0x69, 0x60, 0x87, 0x3e, 0x24, 0x3b, 0x3f,
+	0x94, 0x33, 0x94, 0xc2, 0x26, 0xb2, 0x87, 0x0a, 0x76, 0x7d, 0x72, 0x67, 0x04, 0x23, 0x0d, 0x8b,
+	0x3c, 0xf6, 0xc0, 0x3f, 0xfe, 0x07, 0xb9, 0xd0, 0xae, 0xe4, 0x33, 0xd9, 0xa1, 0x46, 0x1f, 0x93,
+	0x87, 0x05, 0x4e, 0x2a, 0xea, 0xae, 0x1b, 0x16, 0x93, 0xfa, 0x90, 0x3e, 0x21, 0x8f, 0x3f, 0x40,
+	0x72, 0xde, 0xe1, 0x91, 0x57, 0x63, 0x46, 0x54, 0x58, 0xf0, 0xf2, 0x51, 0xe9, 0x51, 0x45, 0xd0,
+	0x9d, 0xb6, 0x5a, 0x05, 0xb0, 0xf7, 0x21, 0x12, 0xd7, 0x06, 0x1e, 0xd3, 0x8f, 0xc8, 0x83, 0xf7,
+	0x91, 0x3a, 0x29, 0xa6, 0x08, 0xfb, 0x6e, 0xb0, 0x5c, 0x16, 0xbb, 0xc7, 0x7f, 0x54, 0xc2, 0x5b,
+	0xa1, 0xab, 0xbe, 0x30, 0x60, 0x91, 0x0d, 0x45, 0x43, 0xc2, 0xc7, 0xa5, 0x3a, 0x9e, 0x87, 0x0c,
+	0x4f, 0xde, 0xaf, 0x6a, 0x7d, 0xe0, 0x95, 0xff, 0xb1, 0xef, 0x43, 0x1e, 0xba, 0x09, 0x52, 0x4f,
+	0xb3, 0xf8, 0x9f, 0xfa, 0x4c, 0x17, 0x8d, 0xae, 0xa5, 0x6c, 0x22, 0x65, 0x04, 0x07, 0xf4, 0x11,
+	0xd9, 0x2d, 0xa3, 0x89, 0x92, 0x89, 0xd4, 0xa8, 0x6c, 0x1b, 0x07, 0xf0, 0x89, 0xcf, 0xc2, 0x02,
+	0x43, 0xa6, 0xc6, 0x8d, 0x2a, 0x9e, 0xcb, 0xd0, 0x63, 0x8a, 0x6b, 0x78, 0x46, 0x3f, 0x21, 0x4f,
+	0xca, 0x44, 0xaf, 0x90, 0x54, 0xb6, 0x17, 0x9a, 0x16, 0x57, 0xac, 0x97, 0x17, 0xc0, 0xa7, 0xff,
+	0x9f, 0xac, 0x0d, 0x53, 0xc6, 0x39, 0xcf, 0x54, 0x39, 0xa4, 0x07, 0xe4, 0xe3, 0x32, 0xd9, 0x65,
+	0xa5, 0x20, 0xdf, 0xec, 0x16, 0x47, 0x97, 0x5d, 0xd7, 0x71, 0x83, 0x54, 0x29, 0x14, 0x66, 0x4e,
+	0xfc, 0x8c, 0x3e, 0x25, 0xfb, 0x97, 0x11, 0x59, 0x10, 0xa4, 0xb1, 0xcd, 0x56, 0x8e, 0xd6, 0x4e,
+	0xc1, 0xe7, 0xbe, 0x1b, 0x16, 0x98, 0x3a, 0x62, 0xba, 0x65, 0xb1, 0x8b, 0xc2, 0xc0, 0xe7, 0x33,
+	0x89, 0xb1, 0x6f, 0xe7, 0x83, 0x3a, 0x92, 0xa2, 0x59, 0x97, 0xb2, 0x0d, 0x2f, 0xfc, 0xb0, 0x5b,
+	0x40, 0x75, 0x4b, 0x2a, 0x93, 0xc1, 0x3f, 0xf1, 0xc3, 0xce, 0xc1, 0x1a, 0x8d, 0x89, 0x30, 0x76,
+	0x3e, 0x7f, 0xea, 0xa6, 0xbe, 0x37, 0x27, 0x2c, 0x54, 0x7e, 0xcb, 0xc0, 0xcf, 0xe8, 0x75, 0xb2,
+	0xe9, 0xed, 0xa6, 0xc7, 0x12, 0xf8, 0x39, 0x05, 0xb2, 0x35, 0x23, 0xba, 0x36, 0x86, 0x5f, 0xf8,
+	0x76, 0x58, 0xf4, 0x68, 0x51, 0x18, 0x35, 0x80, 0x5f, 0xfa, 0xce, 0x75, 0xa0, 0xc2, 0x66, 0xa8,
+	0x0d, 0x2a, 0xe4, 0xd9, 0x23, 0xe0, 0x8b, 0x82, 0x2b, 0xa9, 0x38, 0x2a, 0xf8, 0x95, 0x9f, 0x80,
+	0xd9, 0xdd, 0xdd, 0xac, 0x8b, 0xe0, 0xd7, 0xb3, 0x8a, 0xc1, 0xbe, 0x93, 0xca, 0xcd, 0x13, 0xcb,
+	0x02, 0x13, 0x76, 0x31, 0x3f, 0xa3, 0xe1, 0x37, 0x85, 0x88, 0x98, 0xd6, 0x68, 0x6c, 0x14, 0x6a,
+	0x03, 0xbf, 0xf5, 0xb5, 0xed, 0xcc, 0x02, 0xfb, 0x26, 0xa7, 0xdb, 0x90, 0x03, 0x2b, 0x28, 0x94,
+	0x21, 0x85, 0x5b, 0x87, 0x1c, 0xea, 0xf4, 0x0e, 0xb9, 0xe9, 0xe1, 0x98, 0x99, 0xa0, 0x65, 0x15,
+	0xea, 0x34, 0x32, 0x10, 0xf8, 0x6e, 0x2a, 0x05, 0x3a, 0xf7, 0xcb, 0x0b, 0x17, 0xc9, 0x8d, 0x99,
+	0xe2, 0xe8, 0x67, 0x38, 0x0b, 0x02, 0xd4, 0x3a, 0x4b, 0x89, 0x8c, 0xa0, 0x49, 0x9f, 0x91, 0xa7,
+	0x65, 0x6b, 0xb6, 0x08, 0x2d, 0xc7, 0xc4, 0x2d, 0x7c, 0x11, 0x0c, 0x6c, 0xcc, 0x92, 0xc4, 0xb5,
+	0x63, 0xcb, 0x4b, 0x95, 0xe1, 0x81, 0xe4, 0x08, 0xa1, 0x2f, 0x02, 0x6f, 0x29, 0x2d, 0xff, 0x63,
+	0x2f, 0xfb, 0x22, 0x9a, 0xaf, 0x9e, 0xb6, 0x17, 0x26, 0xc3, 0x34, 0x76, 0x52, 0xb7, 0xde, 0xb3,
+	0xde, 0x8b, 0xfc, 0xc4, 0x59, 0x3c, 0xe5, 0x1e, 0xe7, 0x4b, 0x7f, 0x00, 0xb1, 0x2f, 0xce, 0x45,
+	0x4a, 0x7d, 0x90, 0xb3, 0x42, 0x0e, 0xc2, 0x8b, 0x9b, 0x11, 0x92, 0x50, 0x08, 0xe4, 0x1e, 0x13,
+	0x6e, 0x93, 0x4b, 0xff, 0x88, 0x6c, 0x25, 0x36, 0x23, 0x59, 0xcf, 0x3b, 0x20, 0x4b, 0xab, 0x48,
+	0xe3, 0x3a, 0x2a, 0x48, 0xfc, 0xb2, 0x77, 0x94, 0x97, 0xd0, 0xf1, 0x05, 0xd8, 0x40, 0x6c, 0x2a,
+	0x26, 0x0c, 0x28, 0xbf, 0xc3, 0x66, 0x06, 0xcb, 0xa2, 0x48, 0xf6, 0x5c, 0xb1, 0x80, 0xf6, 0xdc,
+	0xac, 0x59, 0x9c, 0x6c, 0xc6, 0x17, 0xcf, 0xcc, 0x90, 0x0f, 0xe0, 0xb0, 0x29, 0xe6, 0xbd, 0x9e,
+	0xfa, 0xb6, 0x9c, 0x33, 0x9c, 0x82, 0x36, 0x49, 0xeb, 0x6d, 0x1c, 0x58, 0x85, 0x51, 0x3e, 0x6d,
+	0x9d, 0x38, 0x5d, 0x9f, 0xc6, 0xac, 0x2c, 0x30, 0xf6, 0x15, 0xdb, 0x2b, 0xe4, 0xdc, 0x59, 0x7d,
+	0xd5, 0xf6, 0x0b, 0xed, 0xe4, 0xcc, 0x1c, 0x13, 0xa9, 0x43, 0x03, 0x83, 0xd9, 0xc8, 0x2c, 0x34,
+	0x27, 0xbc, 0x2c, 0x34, 0x90, 0x6b, 0x63, 0x5f, 0x3c, 0x99, 0x28, 0xf0, 0xbb, 0x42, 0xb3, 0x67,
+	0x5d, 0xbc, 0x80, 0xfe, 0xbe, 0xf8, 0x7e, 0xc0, 0xdd, 0xbb, 0x9a, 0x42, 0x0e, 0x7f, 0xa0, 0xfb,
+	0xe4, 0x51, 0xd9, 0x6a, 0x63, 0xc9, 0xd3, 0x08, 0xad, 0xe9, 0xfb, 0x54, 0x58, 0xbf, 0x84, 0xe6,
+	0x57, 0xf7, 0xf7, 0xd1, 0xee, 0x4d, 0x30, 0x51, 0xd2, 0xd5, 0x25, 0xfc, 0xb1, 0x44, 0xe2, 0xb2,
+	0x27, 0xb4, 0x51, 0xc8, 0x0a, 0x7c, 0x38, 0xd9, 0xab, 0x54, 0xbf, 0x84, 0x2f, 0xf7, 0x2a, 0xd5,
+	0xaf, 0xe0, 0xab, 0xbd, 0x4a, 0xb5, 0x01, 0x8d, 0xbd, 0x4a, 0xf5, 0x6b, 0xf8, 0xfa, 0xe0, 0x19,
+	0xa1, 0xbd, 0x93, 0xc9, 0x59, 0x3c, 0x9a, 0x4c, 0x4e, 0xbe, 0x19, 0xe9, 0xd7, 0xa7, 0x53, 0xf7,
+	0xde, 0xb8, 0x41, 0x56, 0x3b, 0x29, 0x2a, 0xf7, 0xe6, 0xb8, 0x49, 0xd6, 0xb1, 0x8f, 0x41, 0x6a,
+	0x10, 0x96, 0xeb, 0xc7, 0xff, 0x7e, 0x53, 0x5b, 0xfe, 0xfe, 0x4d, 0x6d, 0xf9, 0xbf, 0x6f, 0x6a,
+	0xcb, 0xff, 0x7c, 0x5b, 0x5b, 0xfa, 0xfe, 0x6d, 0x6d, 0xe9, 0x3f, 0x6f, 0x6b, 0x4b, 0x2f, 0x3f,
+	0xfb, 0xe6, 0xdb, 0xe9, 0x9f, 0x5f, 0x9f, 0x1e, 0x0e, 0xc7, 0x67, 0x47, 0xfe, 0x9b, 0x20, 0xff,
+	0xf3, 0xe9, 0xe4, 0x4f, 0x7f, 0x39, 0x72, 0x4e, 0x4b, 0x1f, 0x09, 0xa7, 0x6b, 0xd9, 0xb7, 0xc1,
+	0x8b, 0xff, 0x05, 0x00, 0x00, 0xff, 0xff, 0x98, 0x43, 0x58, 0x88, 0x43, 0x0c, 0x00, 0x00,
 }
diff --git a/types/accesscontrol/resource.go b/types/accesscontrol/resource.go
index a1c4c7190..3ad944683 100644
--- a/types/accesscontrol/resource.go
+++ b/types/accesscontrol/resource.go
@@ -105,6 +105,8 @@ var ResourceTree = map[ResourceType]TreeNode{
 		ResourceType_KV_DEX_MEM_DEPOSIT,
 		ResourceType_KV_DEX_LONG_ORDER_COUNT,
 		ResourceType_KV_DEX_SHORT_ORDER_COUNT,
+		ResourceType_KV_DEX_MEM_CONTRACTS_TO_PROCESS,
+		ResourceType_KV_DEX_MEM_DOWNSTREAM_CONTRACTS,
 	}},
 	ResourceType_KV_DEX_CONTRACT_LONGBOOK:     {ResourceType_KV_DEX, []ResourceType{}},
 	ResourceType_KV_DEX_CONTRACT_SHORTBOOK:    {ResourceType_KV_DEX, []ResourceType{}},
@@ -194,6 +196,8 @@ var ResourceTree = map[ResourceType]TreeNode{
 	ResourceType_KV_DEX_MEM_ORDER:                     {ResourceType_KV_DEX, []ResourceType{}},
 	ResourceType_KV_DEX_MEM_CANCEL:                    {ResourceType_KV_DEX, []ResourceType{}},
 	ResourceType_KV_DEX_MEM_DEPOSIT:                   {ResourceType_KV_DEX, []ResourceType{}},
+	ResourceType_KV_DEX_MEM_CONTRACTS_TO_PROCESS:      {ResourceType_KV_DEX, []ResourceType{}},
+	ResourceType_KV_DEX_MEM_DOWNSTREAM_CONTRACTS:      {ResourceType_KV_DEX, []ResourceType{}},
 }
 
 // This returns a slice of all resource types that are dependent to a specific resource type

From 62607323474f59b5971fa1566c9e55f7f6cae5d9 Mon Sep 17 00:00:00 2001
From: Uday Patil <udpatil@gmail.com>
Date: Wed, 22 Nov 2023 13:03:37 -0500
Subject: [PATCH 22/65] [occ] OCC scheduler and validation fixes (#359)

## Describe your changes and provide context
This makes optimizations to the scheduler and validation

## Testing performed to validate your change

---------

Co-authored-by: Steven Landers <steven.landers@gmail.com>
---
 store/cache/cache.go        |  33 +++---
 store/cachekv/store.go      |  33 ++++--
 store/multiversion/store.go |  13 ++-
 tasks/scheduler.go          | 215 ++++++++++++++++++++++++------------
 tasks/scheduler_test.go     |   4 +-
 5 files changed, 196 insertions(+), 102 deletions(-)

diff --git a/store/cache/cache.go b/store/cache/cache.go
index cbaeaeb86..b28675ba3 100644
--- a/store/cache/cache.go
+++ b/store/cache/cache.go
@@ -33,7 +33,7 @@ type (
 
 		// the same CommitKVStoreCache may be accessed concurrently by multiple
 		// goroutines due to transaction parallelization
-		mtx sync.Mutex
+		mtx sync.RWMutex
 	}
 
 	// CommitKVStoreCacheManager maintains a mapping from a StoreKey to a
@@ -102,27 +102,34 @@ func (ckv *CommitKVStoreCache) CacheWrap(storeKey types.StoreKey) types.CacheWra
 	return cachekv.NewStore(ckv, storeKey, ckv.cacheKVSize)
 }
 
+// getFromCache queries the write-through cache for a value by key.
+func (ckv *CommitKVStoreCache) getFromCache(key []byte) ([]byte, bool) {
+	ckv.mtx.RLock()
+	defer ckv.mtx.RUnlock()
+	return ckv.cache.Get(string(key))
+}
+
+// getAndWriteToCache queries the underlying CommitKVStore and writes the result
+func (ckv *CommitKVStoreCache) getAndWriteToCache(key []byte) []byte {
+	ckv.mtx.Lock()
+	defer ckv.mtx.Unlock()
+	value := ckv.CommitKVStore.Get(key)
+	ckv.cache.Add(string(key), value)
+	return value
+}
+
 // Get retrieves a value by key. It will first look in the write-through cache.
 // If the value doesn't exist in the write-through cache, the query is delegated
 // to the underlying CommitKVStore.
 func (ckv *CommitKVStoreCache) Get(key []byte) []byte {
-	ckv.mtx.Lock()
-	defer ckv.mtx.Unlock()
-
 	types.AssertValidKey(key)
 
-	keyStr := string(key)
-	value, ok := ckv.cache.Get(keyStr)
-	if ok {
-		// cache hit
+	if value, ok := ckv.getFromCache(key); ok {
 		return value
 	}
 
-	// cache miss; write to cache
-	value = ckv.CommitKVStore.Get(key)
-	ckv.cache.Add(keyStr, value)
-
-	return value
+	// if not found in the cache, query the underlying CommitKVStore and init cache value
+	return ckv.getAndWriteToCache(key)
 }
 
 // Set inserts a key/value pair into both the write-through cache and the
diff --git a/store/cachekv/store.go b/store/cachekv/store.go
index f03ee517e..9a21b695c 100644
--- a/store/cachekv/store.go
+++ b/store/cachekv/store.go
@@ -56,7 +56,7 @@ func (b mapCacheBackend) Range(f func(string, *types.CValue) bool) {
 
 // Store wraps an in-memory cache around an underlying types.KVStore.
 type Store struct {
-	mtx           sync.Mutex
+	mtx           sync.RWMutex
 	cache         *types.BoundedCache
 	deleted       *sync.Map
 	unsortedCache map[string]struct{}
@@ -104,20 +104,33 @@ func (store *Store) GetStoreType() types.StoreType {
 	return store.parent.GetStoreType()
 }
 
-// Get implements types.KVStore.
-func (store *Store) Get(key []byte) (value []byte) {
+// getFromCache queries the write-through cache for a value by key.
+func (store *Store) getFromCache(key []byte) ([]byte, bool) {
+	store.mtx.RLock()
+	defer store.mtx.RUnlock()
+	if cv, ok := store.cache.Get(conv.UnsafeBytesToStr(key)); ok {
+		return cv.Value(), true
+	}
+	return nil, false
+}
+
+// getAndWriteToCache queries the underlying CommitKVStore and writes the result
+func (store *Store) getAndWriteToCache(key []byte) []byte {
 	store.mtx.Lock()
 	defer store.mtx.Unlock()
+	value := store.parent.Get(key)
+	store.setCacheValue(key, value, false, false)
+	return value
+}
 
+// Get implements types.KVStore.
+func (store *Store) Get(key []byte) (value []byte) {
 	types.AssertValidKey(key)
 
-	cacheValue, ok := store.cache.Get(conv.UnsafeBytesToStr(key))
+	value, ok := store.getFromCache(key)
 	if !ok {
 		// TODO: (occ) This is an example of when we fall through when we dont have a cache hit. Similarly, for mvkv, we'll try to serve reads from a local cache thats transient to the TX, and if its NOT present, then we read through AND mark the access (along with the value that was read) for validation
-		value = store.parent.Get(key)
-		store.setCacheValue(key, value, false, false)
-	} else {
-		value = cacheValue.Value()
+		value = store.getAndWriteToCache(key)
 	}
 	// TODO: (occ) This is an example of how we currently track accesses
 	store.eventManager.EmitResourceAccessReadEvent("get", store.storeKey, key, value)
@@ -140,8 +153,8 @@ func (store *Store) Set(key []byte, value []byte) {
 // Has implements types.KVStore.
 func (store *Store) Has(key []byte) bool {
 	value := store.Get(key)
-	store.mtx.Lock()
-	defer store.mtx.Unlock()
+	store.mtx.RLock()
+	defer store.mtx.RUnlock()
 	store.eventManager.EmitResourceAccessReadEvent("has", store.storeKey, key, value)
 	return value != nil
 }
diff --git a/store/multiversion/store.go b/store/multiversion/store.go
index bc5e8ee4a..16fb04597 100644
--- a/store/multiversion/store.go
+++ b/store/multiversion/store.go
@@ -320,11 +320,11 @@ func (s *Store) validateIterator(index int, tracker iterationTracker) bool {
 }
 
 func (s *Store) checkIteratorAtIndex(index int) bool {
-	s.mtx.RLock()
-	defer s.mtx.RUnlock()
-
 	valid := true
+	s.mtx.RLock()
 	iterateset := s.txIterateSets[index]
+	s.mtx.RUnlock()
+
 	for _, iterationTracker := range iterateset {
 		iteratorValid := s.validateIterator(index, iterationTracker)
 		valid = valid && iteratorValid
@@ -333,11 +333,12 @@ func (s *Store) checkIteratorAtIndex(index int) bool {
 }
 
 func (s *Store) checkReadsetAtIndex(index int) (bool, []int) {
-	s.mtx.RLock()
-	defer s.mtx.RUnlock()
-
 	conflictSet := make(map[int]struct{})
+
+	s.mtx.RLock()
 	readset := s.txReadSets[index]
+	s.mtx.RUnlock()
+
 	valid := true
 
 	// iterate over readset and check if the value is the same as the latest value relateive to txIndex in the multiversion store
diff --git a/tasks/scheduler.go b/tasks/scheduler.go
index 7b1afc0d2..c00e70dbe 100644
--- a/tasks/scheduler.go
+++ b/tasks/scheduler.go
@@ -4,6 +4,7 @@ import (
 	"crypto/sha256"
 	"fmt"
 	"sort"
+	"sync"
 
 	"github.com/tendermint/tendermint/abci/types"
 	"go.opentelemetry.io/otel/attribute"
@@ -38,7 +39,6 @@ const (
 
 type deliverTxTask struct {
 	Ctx     sdk.Context
-	Span    trace.Span
 	AbortCh chan occ.Abort
 
 	Status        status
@@ -49,10 +49,10 @@ type deliverTxTask struct {
 	Request       types.RequestDeliverTx
 	Response      *types.ResponseDeliverTx
 	VersionStores map[sdk.StoreKey]*multiversion.VersionIndexedStore
+	ValidateCh    chan struct{}
 }
 
-func (dt *deliverTxTask) Increment() {
-	dt.Incarnation++
+func (dt *deliverTxTask) Reset() {
 	dt.Status = statusPending
 	dt.Response = nil
 	dt.Abort = nil
@@ -61,6 +61,11 @@ func (dt *deliverTxTask) Increment() {
 	dt.VersionStores = nil
 }
 
+func (dt *deliverTxTask) Increment() {
+	dt.Incarnation++
+	dt.ValidateCh = make(chan struct{}, 1)
+}
+
 // Scheduler processes tasks concurrently
 type Scheduler interface {
 	ProcessAll(ctx sdk.Context, reqs []*sdk.DeliverTxEntry) ([]types.ResponseDeliverTx, error)
@@ -71,6 +76,7 @@ type scheduler struct {
 	workers            int
 	multiVersionStores map[sdk.StoreKey]multiversion.MultiVersionStore
 	tracingInfo        *tracing.Info
+	allTasks           []*deliverTxTask
 }
 
 // NewScheduler creates a new scheduler
@@ -111,9 +117,10 @@ func toTasks(reqs []*sdk.DeliverTxEntry) []*deliverTxTask {
 	res := make([]*deliverTxTask, 0, len(reqs))
 	for idx, r := range reqs {
 		res = append(res, &deliverTxTask{
-			Request: r.Request,
-			Index:   idx,
-			Status:  statusPending,
+			Request:    r.Request,
+			Index:      idx,
+			Status:     statusPending,
+			ValidateCh: make(chan struct{}, 1),
 		})
 	}
 	return res
@@ -175,6 +182,7 @@ func (s *scheduler) ProcessAll(ctx sdk.Context, reqs []*sdk.DeliverTxEntry) ([]t
 	// prefill estimates
 	s.PrefillEstimates(ctx, reqs)
 	tasks := toTasks(reqs)
+	s.allTasks = tasks
 	toExecute := tasks
 	for !allValidated(tasks) {
 		var err error
@@ -193,9 +201,6 @@ func (s *scheduler) ProcessAll(ctx sdk.Context, reqs []*sdk.DeliverTxEntry) ([]t
 		if err != nil {
 			return nil, err
 		}
-		for _, t := range toExecute {
-			t.Increment()
-		}
 	}
 	for _, mv := range s.multiVersionStores {
 		mv.WriteLatestToStore()
@@ -203,52 +208,83 @@ func (s *scheduler) ProcessAll(ctx sdk.Context, reqs []*sdk.DeliverTxEntry) ([]t
 	return collectResponses(tasks), nil
 }
 
-func (s *scheduler) validateAll(ctx sdk.Context, tasks []*deliverTxTask) ([]*deliverTxTask, error) {
-	spanCtx, span := s.tracingInfo.StartWithContext("SchedulerValidate", ctx.TraceSpanContext())
-	ctx = ctx.WithTraceSpanContext(spanCtx)
+func (s *scheduler) shouldRerun(task *deliverTxTask) bool {
+	switch task.Status {
+
+	case statusAborted, statusPending:
+		return true
+
+	// validated tasks can become unvalidated if an earlier re-run task now conflicts
+	case statusExecuted, statusValidated:
+		if valid, conflicts := s.findConflicts(task); !valid {
+			s.invalidateTask(task)
+
+			// if the conflicts are now validated, then rerun this task
+			if indexesValidated(s.allTasks, conflicts) {
+				return true
+			} else {
+				// otherwise, wait for completion
+				task.Dependencies = conflicts
+				task.Status = statusWaiting
+				return false
+			}
+		} else if len(conflicts) == 0 {
+			// mark as validated, which will avoid re-validating unless a lower-index re-validates
+			task.Status = statusValidated
+			return false
+		}
+		// conflicts and valid, so it'll validate next time
+		return false
+
+	case statusWaiting:
+		// if conflicts are done, then this task is ready to run again
+		return indexesValidated(s.allTasks, task.Dependencies)
+	}
+	panic("unexpected status: " + task.Status)
+}
+
+func (s *scheduler) validateTask(ctx sdk.Context, task *deliverTxTask) bool {
+	_, span := s.traceSpan(ctx, "SchedulerValidate", task)
 	defer span.End()
 
-	var res []*deliverTxTask
+	if s.shouldRerun(task) {
+		return false
+	}
+	return true
+}
 
-	// find first non-validated entry
-	var startIdx int
-	for idx, t := range tasks {
+func (s *scheduler) findFirstNonValidated() (int, bool) {
+	for i, t := range s.allTasks {
 		if t.Status != statusValidated {
-			startIdx = idx
-			break
+			return i, true
 		}
 	}
+	return 0, false
+}
 
-	for i := startIdx; i < len(tasks); i++ {
-		switch tasks[i].Status {
-		case statusAborted:
-			// aborted means it can be re-run immediately
-			res = append(res, tasks[i])
-
-		// validated tasks can become unvalidated if an earlier re-run task now conflicts
-		case statusExecuted, statusValidated:
-			if valid, conflicts := s.findConflicts(tasks[i]); !valid {
-				s.invalidateTask(tasks[i])
-
-				// if the conflicts are now validated, then rerun this task
-				if indexesValidated(tasks, conflicts) {
-					res = append(res, tasks[i])
-				} else {
-					// otherwise, wait for completion
-					tasks[i].Dependencies = conflicts
-					tasks[i].Status = statusWaiting
-				}
-			} else if len(conflicts) == 0 {
-				tasks[i].Status = statusValidated
-			} // TODO: do we need to have handling for conflicts existing here?
-
-		case statusWaiting:
-			// if conflicts are done, then this task is ready to run again
-			if indexesValidated(tasks, tasks[i].Dependencies) {
-				res = append(res, tasks[i])
+func (s *scheduler) validateAll(ctx sdk.Context, tasks []*deliverTxTask) ([]*deliverTxTask, error) {
+	ctx, span := s.traceSpan(ctx, "SchedulerValidateAll", nil)
+	defer span.End()
+
+	var mx sync.Mutex
+	var res []*deliverTxTask
+
+	wg := sync.WaitGroup{}
+	for i := 0; i < len(tasks); i++ {
+		wg.Add(1)
+		go func(task *deliverTxTask) {
+			defer wg.Done()
+			if !s.validateTask(ctx, task) {
+				task.Reset()
+				task.Increment()
+				mx.Lock()
+				res = append(res, task)
+				mx.Unlock()
 			}
-		}
+		}(tasks[i])
 	}
+	wg.Wait()
+
 	return res, nil
 }
 
@@ -256,6 +292,9 @@ func (s *scheduler) validateAll(ctx sdk.Context, tasks []*deliverTxTask) ([]*del
 // Tasks are updated with their status
 // TODO: error scenarios
 func (s *scheduler) executeAll(ctx sdk.Context, tasks []*deliverTxTask) error {
+	ctx, span := s.traceSpan(ctx, "SchedulerExecuteAll", nil)
+	defer span.End()
+
 	ch := make(chan *deliverTxTask, len(tasks))
 	grp, gCtx := errgroup.WithContext(ctx.Context())
 
@@ -265,6 +304,15 @@ func (s *scheduler) executeAll(ctx sdk.Context, tasks []*deliverTxTask) error {
 		workers = len(tasks)
 	}
 
+	// validationWg waits for all validations to complete
+	// validations happen in separate goroutines in order to wait on previous index
+	validationWg := &sync.WaitGroup{}
+	validationWg.Add(len(tasks))
+	grp.Go(func() error {
+		validationWg.Wait()
+		return nil
+	})
+
 	for i := 0; i < workers; i++ {
 		grp.Go(func() error {
 			for {
@@ -275,24 +323,16 @@ func (s *scheduler) executeAll(ctx sdk.Context, tasks []*deliverTxTask) error {
 					if !ok {
 						return nil
 					}
-					s.executeTask(task)
+					s.prepareAndRunTask(validationWg, ctx, task)
 				}
 			}
 		})
 	}
-	grp.Go(func() error {
-		defer close(ch)
-		for _, task := range tasks {
-			s.prepareTask(ctx, task)
-
-			select {
-			case <-gCtx.Done():
-				return gCtx.Err()
-			case ch <- task:
-			}
-		}
-		return nil
-	})
+
+	for _, task := range tasks {
+		ch <- task
+	}
+	close(ch)
 
 	if err := grp.Wait(); err != nil {
 		return err
@@ -301,16 +341,46 @@ func (s *scheduler) executeAll(ctx sdk.Context, tasks []*deliverTxTask) error {
 	return nil
 }
 
+func (s *scheduler) prepareAndRunTask(wg *sync.WaitGroup, ctx sdk.Context, task *deliverTxTask) {
+	eCtx, eSpan := s.traceSpan(ctx, "SchedulerExecute", task)
+	defer eSpan.End()
+	task.Ctx = eCtx
+
+	s.executeTask(task.Ctx, task)
+	go func() {
+		defer wg.Done()
+		defer close(task.ValidateCh)
+		// wait on previous task to finish validation
+		if task.Index > 0 {
+			<-s.allTasks[task.Index-1].ValidateCh
+		}
+		if !s.validateTask(task.Ctx, task) {
+			task.Reset()
+		}
+		task.ValidateCh <- struct{}{}
+	}()
+}
+
+func (s *scheduler) traceSpan(ctx sdk.Context, name string, task *deliverTxTask) (sdk.Context, trace.Span) {
+	spanCtx, span := s.tracingInfo.StartWithContext(name, ctx.TraceSpanContext())
+	if task != nil {
+		span.SetAttributes(attribute.String("txHash", fmt.Sprintf("%X", sha256.Sum256(task.Request.Tx))))
+		span.SetAttributes(attribute.Int("txIndex", task.Index))
+		span.SetAttributes(attribute.Int("txIncarnation", task.Incarnation))
+	}
+	ctx = ctx.WithTraceSpanContext(spanCtx)
+	return ctx, span
+}
+
 // prepareTask initializes the context and version stores for a task
 func (s *scheduler) prepareTask(ctx sdk.Context, task *deliverTxTask) {
-	// initialize the context
 	ctx = ctx.WithTxIndex(task.Index)
+
+	_, span := s.traceSpan(ctx, "SchedulerPrepare", task)
+	defer span.End()
+
+	// initialize the context
 	abortCh := make(chan occ.Abort, len(s.multiVersionStores))
-	spanCtx, span := s.tracingInfo.StartWithContext("SchedulerExecute", ctx.TraceSpanContext())
-	span.SetAttributes(attribute.String("txHash", fmt.Sprintf("%X", sha256.Sum256(task.Request.Tx))))
-	span.SetAttributes(attribute.Int("txIndex", task.Index))
-	span.SetAttributes(attribute.Int("txIncarnation", task.Incarnation))
-	ctx = ctx.WithTraceSpanContext(spanCtx)
 
 	// if there are no stores, don't try to wrap, because there's nothing to wrap
 	if len(s.multiVersionStores) > 0 {
@@ -334,14 +404,17 @@ func (s *scheduler) prepareTask(ctx sdk.Context, task *deliverTxTask) {
 
 	task.AbortCh = abortCh
 	task.Ctx = ctx
-	task.Span = span
 }
 
 // executeTask executes a single task
-func (s *scheduler) executeTask(task *deliverTxTask) {
-	if task.Span != nil {
-		defer task.Span.End()
-	}
+func (s *scheduler) executeTask(ctx sdk.Context, task *deliverTxTask) {
+
+	s.prepareTask(ctx, task)
+
+	dCtx, dSpan := s.traceSpan(task.Ctx, "SchedulerDeliverTx", task)
+	defer dSpan.End()
+	task.Ctx = dCtx
+
 	resp := s.deliverTx(task.Ctx, task.Request)
 
 	close(task.AbortCh)
diff --git a/tasks/scheduler_test.go b/tasks/scheduler_test.go
index accc8bf3e..9d24b54a8 100644
--- a/tasks/scheduler_test.go
+++ b/tasks/scheduler_test.go
@@ -66,7 +66,7 @@ func TestProcessAll(t *testing.T) {
 		{
 			name:      "Test every tx accesses same key",
 			workers:   50,
-			runs:      25,
+			runs:      50,
 			addStores: true,
 			requests:  requestList(50),
 			deliverTxFunc: func(ctx sdk.Context, req types.RequestDeliverTx) types.ResponseDeliverTx {
@@ -94,7 +94,7 @@ func TestProcessAll(t *testing.T) {
 				}
 				// confirm last write made it to the parent store
 				latest := ctx.MultiStore().GetKVStore(testStoreKey).Get(itemKey)
-				require.Equal(t, []byte("49"), latest)
+				require.Equal(t, []byte(fmt.Sprintf("%d", len(res)-1)), latest)
 			},
 			expectedErr: nil,
 		},

From c660786349d50ad0978132660f3ee1318ec4ae38 Mon Sep 17 00:00:00 2001
From: Uday Patil <udpatil@gmail.com>
Date: Mon, 27 Nov 2023 09:56:37 -0500
Subject: [PATCH 23/65] [occ] Add optimizations for multiversion and mvkv
 (#361)

## Describe your changes and provide context
Add optimizations to reduce mutex lock contention and refactor with sync
Maps. This also removes telemetry that was added liberally, and we can
later add in telemetry more mindfully and feature flagged.

## Testing performed to validate your change
loadtest chain testing
---
 baseapp/baseapp.go                     |  14 +-
 store/cache/cache.go                   |   4 +-
 store/cachekv/search_benchmark_test.go |  13 +-
 store/cachekv/store.go                 | 189 +++++----------------
 store/multiversion/mvkv.go             |  53 +++---
 store/multiversion/store.go            | 219 +++++++++++++------------
 store/types/cache.go                   |   8 +-
 tasks/scheduler.go                     |   2 +
 8 files changed, 205 insertions(+), 297 deletions(-)

diff --git a/baseapp/baseapp.go b/baseapp/baseapp.go
index 2fd2e89f8..3af7c1098 100644
--- a/baseapp/baseapp.go
+++ b/baseapp/baseapp.go
@@ -867,13 +867,13 @@ func (app *BaseApp) cacheTxContext(ctx sdk.Context, txBytes []byte) (sdk.Context
 // and execute successfully. An error is returned otherwise.
 func (app *BaseApp) runTx(ctx sdk.Context, mode runTxMode, txBytes []byte) (gInfo sdk.GasInfo, result *sdk.Result, anteEvents []abci.Event, priority int64, err error) {
 
-	defer telemetry.MeasureThroughputSinceWithLabels(
-		telemetry.TxCount,
-		[]metrics.Label{
-			telemetry.NewLabel("mode", modeKeyToString[mode]),
-		},
-		time.Now(),
-	)
+	// defer telemetry.MeasureThroughputSinceWithLabels(
+	// 	telemetry.TxCount,
+	// 	[]metrics.Label{
+	// 		telemetry.NewLabel("mode", modeKeyToString[mode]),
+	// 	},
+	// 	time.Now(),
+	// )
 
 	// Reset events after each checkTx or simulateTx or recheckTx
 	// DeliverTx is garbage collected after FinalizeBlocker
diff --git a/store/cache/cache.go b/store/cache/cache.go
index b28675ba3..1d4054653 100644
--- a/store/cache/cache.go
+++ b/store/cache/cache.go
@@ -111,8 +111,8 @@ func (ckv *CommitKVStoreCache) getFromCache(key []byte) ([]byte, bool) {
 
 // getAndWriteToCache queries the underlying CommitKVStore and writes the result
 func (ckv *CommitKVStoreCache) getAndWriteToCache(key []byte) []byte {
-	ckv.mtx.Lock()
-	defer ckv.mtx.Unlock()
+	ckv.mtx.RLock()
+	defer ckv.mtx.RUnlock()
 	value := ckv.CommitKVStore.Get(key)
 	ckv.cache.Add(string(key), value)
 	return value
diff --git a/store/cachekv/search_benchmark_test.go b/store/cachekv/search_benchmark_test.go
index d31b0218f..dde9cf6ca 100644
--- a/store/cachekv/search_benchmark_test.go
+++ b/store/cachekv/search_benchmark_test.go
@@ -2,6 +2,7 @@ package cachekv
 
 import (
 	"strconv"
+	"sync"
 	"testing"
 
 	"github.com/cosmos/cosmos-sdk/store/types"
@@ -23,18 +24,18 @@ func BenchmarkLargeUnsortedMisses(b *testing.B) {
 }
 
 func generateStore() *Store {
-	cache := types.NewBoundedCache(mapCacheBackend{make(map[string]*types.CValue)}, types.DefaultCacheSizeLimit)
-	unsorted := map[string]struct{}{}
+	cache := &sync.Map{}
+	unsorted := &sync.Map{}
 	for i := 0; i < 5000; i++ {
 		key := "A" + strconv.Itoa(i)
-		unsorted[key] = struct{}{}
-		cache.CacheBackend.Set(key, &types.CValue{})
+		unsorted.Store(key, struct{}{})
+		cache.Store(key, &types.CValue{})
 	}
 
 	for i := 0; i < 5000; i++ {
 		key := "Z" + strconv.Itoa(i)
-		unsorted[key] = struct{}{}
-		cache.CacheBackend.Set(key, &types.CValue{})
+		unsorted.Store(key, struct{}{})
+		cache.Store(key, &types.CValue{})
 	}
 
 	return &Store{
diff --git a/store/cachekv/store.go b/store/cachekv/store.go
index 9a21b695c..83bd9204a 100644
--- a/store/cachekv/store.go
+++ b/store/cachekv/store.go
@@ -5,61 +5,23 @@ import (
 	"io"
 	"sort"
 	"sync"
-	"time"
 
 	"github.com/cosmos/cosmos-sdk/internal/conv"
 	"github.com/cosmos/cosmos-sdk/store/listenkv"
 	"github.com/cosmos/cosmos-sdk/store/tracekv"
 	"github.com/cosmos/cosmos-sdk/store/types"
-	"github.com/cosmos/cosmos-sdk/telemetry"
 	sdktypes "github.com/cosmos/cosmos-sdk/types"
 	"github.com/cosmos/cosmos-sdk/types/kv"
 	abci "github.com/tendermint/tendermint/abci/types"
-	"github.com/tendermint/tendermint/libs/math"
 	dbm "github.com/tendermint/tm-db"
 )
 
-type mapCacheBackend struct {
-	m map[string]*types.CValue
-}
-
-func (b mapCacheBackend) Get(key string) (val *types.CValue, ok bool) {
-	val, ok = b.m[key]
-	return
-}
-
-func (b mapCacheBackend) Set(key string, val *types.CValue) {
-	b.m[key] = val
-}
-
-func (b mapCacheBackend) Len() int {
-	return len(b.m)
-}
-
-func (b mapCacheBackend) Delete(key string) {
-	delete(b.m, key)
-}
-
-func (b mapCacheBackend) Range(f func(string, *types.CValue) bool) {
-	// this is always called within a mutex so all operations below are atomic
-	keys := []string{}
-	for k := range b.m {
-		keys = append(keys, k)
-	}
-	for _, key := range keys {
-		val, _ := b.Get(key)
-		if !f(key, val) {
-			break
-		}
-	}
-}
-
 // Store wraps an in-memory cache around an underlying types.KVStore.
 type Store struct {
 	mtx           sync.RWMutex
-	cache         *types.BoundedCache
+	cache         *sync.Map
 	deleted       *sync.Map
-	unsortedCache map[string]struct{}
+	unsortedCache *sync.Map
 	sortedCache   *dbm.MemDB // always ascending sorted
 	parent        types.KVStore
 	eventManager  *sdktypes.EventManager
@@ -72,9 +34,9 @@ var _ types.CacheKVStore = (*Store)(nil)
 // NewStore creates a new Store object
 func NewStore(parent types.KVStore, storeKey types.StoreKey, cacheSize int) *Store {
 	return &Store{
-		cache:         types.NewBoundedCache(mapCacheBackend{make(map[string]*types.CValue)}, cacheSize),
+		cache:         &sync.Map{},
 		deleted:       &sync.Map{},
-		unsortedCache: make(map[string]struct{}),
+		unsortedCache: &sync.Map{},
 		sortedCache:   dbm.NewMemDB(),
 		parent:        parent,
 		eventManager:  sdktypes.NewEventManager(),
@@ -94,8 +56,6 @@ func (store *Store) GetEvents() []abci.Event {
 
 // Implements Store
 func (store *Store) ResetEvents() {
-	store.mtx.Lock()
-	defer store.mtx.Unlock()
 	store.eventManager = sdktypes.NewEventManager()
 }
 
@@ -105,13 +65,12 @@ func (store *Store) GetStoreType() types.StoreType {
 }
 
 // getFromCache queries the write-through cache for a value by key.
-func (store *Store) getFromCache(key []byte) ([]byte, bool) {
-	store.mtx.RLock()
-	defer store.mtx.RUnlock()
-	if cv, ok := store.cache.Get(conv.UnsafeBytesToStr(key)); ok {
-		return cv.Value(), true
+func (store *Store) getFromCache(key []byte) []byte {
+	if cv, ok := store.cache.Load(conv.UnsafeBytesToStr(key)); ok {
+		return cv.(*types.CValue).Value()
+	} else {
+		return store.parent.Get(key)
 	}
-	return nil, false
 }
 
 // getAndWriteToCache queries the underlying CommitKVStore and writes the result
@@ -126,69 +85,44 @@ func (store *Store) getAndWriteToCache(key []byte) []byte {
 // Get implements types.KVStore.
 func (store *Store) Get(key []byte) (value []byte) {
 	types.AssertValidKey(key)
-
-	value, ok := store.getFromCache(key)
-	if !ok {
-		// TODO: (occ) This is an example of when we fall through when we dont have a cache hit. Similarly, for mvkv, we'll try to serve reads from a local cache thats transient to the TX, and if its NOT present, then we read through AND mark the access (along with the value that was read) for validation
-		value = store.getAndWriteToCache(key)
-	}
-	// TODO: (occ) This is an example of how we currently track accesses
-	store.eventManager.EmitResourceAccessReadEvent("get", store.storeKey, key, value)
-
-	return value
+	return store.getFromCache(key)
 }
 
 // Set implements types.KVStore.
 func (store *Store) Set(key []byte, value []byte) {
-	store.mtx.Lock()
-	defer store.mtx.Unlock()
-
 	types.AssertValidKey(key)
 	types.AssertValidValue(value)
-
 	store.setCacheValue(key, value, false, true)
-	store.eventManager.EmitResourceAccessWriteEvent("set", store.storeKey, key, value)
 }
 
 // Has implements types.KVStore.
 func (store *Store) Has(key []byte) bool {
 	value := store.Get(key)
-	store.mtx.RLock()
-	defer store.mtx.RUnlock()
-	store.eventManager.EmitResourceAccessReadEvent("has", store.storeKey, key, value)
 	return value != nil
 }
 
 // Delete implements types.KVStore.
 func (store *Store) Delete(key []byte) {
-	store.mtx.Lock()
-	defer store.mtx.Unlock()
-	defer telemetry.MeasureSince(time.Now(), "store", "cachekv", "delete")
-
 	types.AssertValidKey(key)
 	store.setCacheValue(key, nil, true, true)
-	store.eventManager.EmitResourceAccessWriteEvent("delete", store.storeKey, key, []byte{})
 }
 
 // Implements Cachetypes.KVStore.
 func (store *Store) Write() {
 	store.mtx.Lock()
 	defer store.mtx.Unlock()
-	defer telemetry.MeasureSince(time.Now(), "store", "cachekv", "write")
 
 	// We need a copy of all of the keys.
 	// Not the best, but probably not a bottleneck depending.
-	keys := make([]string, 0, store.cache.Len())
+	keys := []string{}
 
-	store.cache.Range(func(key string, dbValue *types.CValue) bool {
-		if dbValue.Dirty() {
-			keys = append(keys, key)
+	store.cache.Range(func(key, value any) bool {
+		if value.(*types.CValue).Dirty() {
+			keys = append(keys, key.(string))
 		}
 		return true
 	})
-
 	sort.Strings(keys)
-
 	// TODO: Consider allowing usage of Batch, which would allow the write to
 	// at least happen atomically.
 	for _, key := range keys {
@@ -201,24 +135,28 @@ func (store *Store) Write() {
 			continue
 		}
 
-		cacheValue, _ := store.cache.Get(key)
-		if cacheValue.Value() != nil {
+		cacheValue, _ := store.cache.Load(key)
+		if cacheValue.(*types.CValue).Value() != nil {
 			// It already exists in the parent, hence delete it.
-			store.parent.Set([]byte(key), cacheValue.Value())
+			store.parent.Set([]byte(key), cacheValue.(*types.CValue).Value())
 		}
 	}
 
 	// Clear the cache using the map clearing idiom
 	// and not allocating fresh objects.
 	// Please see https://bencher.orijtech.com/perfclinic/mapclearing/
-	store.cache.DeleteAll()
+	store.cache.Range(func(key, value any) bool {
+		store.cache.Delete(key)
+		return true
+	})
 	store.deleted.Range(func(key, value any) bool {
 		store.deleted.Delete(key)
 		return true
 	})
-	for key := range store.unsortedCache {
-		delete(store.unsortedCache, key)
-	}
+	store.unsortedCache.Range(func(key, value any) bool {
+		store.deleted.Delete(key)
+		return true
+	})
 	store.sortedCache = dbm.NewMemDB()
 }
 
@@ -253,7 +191,6 @@ func (store *Store) ReverseIterator(start, end []byte) types.Iterator {
 func (store *Store) iterator(start, end []byte, ascending bool) types.Iterator {
 	store.mtx.Lock()
 	defer store.mtx.Unlock()
-
 	// TODO: (occ) Note that for iterators, we'll need to have special handling (discussed in RFC) to ensure proper validation
 
 	var parent, cache types.Iterator
@@ -367,7 +304,6 @@ func (store *Store) dirtyItems(start, end []byte) {
 		return
 	}
 
-	n := len(store.unsortedCache)
 	unsorted := make([]*kv.Pair, 0)
 	// If the unsortedCache is too big, its costs too much to determine
 	// whats in the subset we are concerned about.
@@ -375,54 +311,25 @@ func (store *Store) dirtyItems(start, end []byte) {
 	// O(N^2) overhead.
 	// Even without that, too many range checks eventually becomes more expensive
 	// than just not having the cache.
-	store.emitUnsortedCacheSizeMetric()
-	if n < minSortSize {
-		for key := range store.unsortedCache {
-			if dbm.IsKeyInDomain(conv.UnsafeStrToBytes(key), start, end) {
-				cacheValue, _ := store.cache.Get(key)
-				unsorted = append(unsorted, &kv.Pair{Key: []byte(key), Value: cacheValue.Value()})
+	// store.emitUnsortedCacheSizeMetric()
+	// TODO: do we need to check the size of the unsortedCache?
+	store.unsortedCache.Range(func(key, value any) bool {
+		cKey := key.(string)
+		if dbm.IsKeyInDomain(conv.UnsafeStrToBytes(cKey), start, end) {
+			cacheValue, found := store.cache.Load(key)
+			if found { //TODO: is this correct?
+				unsorted = append(unsorted, &kv.Pair{Key: []byte(cKey), Value: cacheValue.(*types.CValue).Value()})
 			}
 		}
-		store.clearUnsortedCacheSubset(unsorted, stateUnsorted)
-		return
-	}
-
-	// Otherwise it is large so perform a modified binary search to find
-	// the target ranges for the keys that we should be looking for.
-	strL := make([]string, 0, n)
-	for key := range store.unsortedCache {
-		strL = append(strL, key)
-	}
-	sort.Strings(strL)
-
-	startIndex, endIndex := findStartEndIndex(strL, startStr, endStr)
-
-	// Since we spent cycles to sort the values, we should process and remove a reasonable amount
-	// ensure start to end is at least minSortSize in size
-	// if below minSortSize, expand it to cover additional values
-	// this amortizes the cost of processing elements across multiple calls
-	if endIndex-startIndex < minSortSize {
-		endIndex = math.MinInt(startIndex+minSortSize, len(strL)-1)
-		if endIndex-startIndex < minSortSize {
-			startIndex = math.MaxInt(endIndex-minSortSize, 0)
-		}
-	}
-
-	kvL := make([]*kv.Pair, 0, 1+endIndex-startIndex)
-	for i := startIndex; i <= endIndex; i++ {
-		key := strL[i]
-		cacheValue, _ := store.cache.Get(key)
-		kvL = append(kvL, &kv.Pair{Key: []byte(key), Value: cacheValue.Value()})
-	}
-
-	// kvL was already sorted so pass it in as is.
-	store.clearUnsortedCacheSubset(kvL, stateAlreadySorted)
-	store.emitUnsortedCacheSizeMetric()
+		return true
+	})
+	store.clearUnsortedCacheSubset(unsorted, stateUnsorted)
+	return
 }
 
 func (store *Store) emitUnsortedCacheSizeMetric() {
-	n := len(store.unsortedCache)
-	telemetry.SetGauge(float32(n), "sei", "cosmos", "unsorted", "cache", "size")
+	// n := len(store.unsortedCache)
+	// telemetry.SetGauge(float32(n), "sei", "cosmos", "unsorted", "cache", "size")
 }
 
 func findStartEndIndex(strL []string, startStr, endStr string) (int, int) {
@@ -466,18 +373,10 @@ func (store *Store) clearUnsortedCacheSubset(unsorted []*kv.Pair, sortState sort
 }
 
 func (store *Store) deleteKeysFromUnsortedCache(unsorted []*kv.Pair) {
-	n := len(store.unsortedCache)
-	store.emitUnsortedCacheSizeMetric()
-	if len(unsorted) == n { // This pattern allows the Go compiler to emit the map clearing idiom for the entire map.
-		for key := range store.unsortedCache {
-			delete(store.unsortedCache, key)
-		}
-	} else { // Otherwise, normally delete the unsorted keys from the map.
-		for _, kv := range unsorted {
-			delete(store.unsortedCache, conv.UnsafeBytesToStr(kv.Key))
-		}
+	for _, kv := range unsorted {
+		keyStr := conv.UnsafeBytesToStr(kv.Key)
+		store.unsortedCache.Delete(keyStr)
 	}
-	defer store.emitUnsortedCacheSizeMetric()
 }
 
 //----------------------------------------
@@ -488,14 +387,14 @@ func (store *Store) setCacheValue(key, value []byte, deleted bool, dirty bool) {
 	types.AssertValidKey(key)
 
 	keyStr := conv.UnsafeBytesToStr(key)
-	store.cache.Set(keyStr, types.NewCValue(value, dirty))
+	store.cache.Store(keyStr, types.NewCValue(value, dirty))
 	if deleted {
 		store.deleted.Store(keyStr, struct{}{})
 	} else {
 		store.deleted.Delete(keyStr)
 	}
 	if dirty {
-		store.unsortedCache[keyStr] = struct{}{}
+		store.unsortedCache.Store(keyStr, struct{}{})
 	}
 }
 
diff --git a/store/multiversion/mvkv.go b/store/multiversion/mvkv.go
index 6eeabd517..1e8437ad7 100644
--- a/store/multiversion/mvkv.go
+++ b/store/multiversion/mvkv.go
@@ -3,13 +3,10 @@ package multiversion
 import (
 	"io"
 	"sort"
-	"sync"
-	"time"
 
 	abci "github.com/tendermint/tendermint/abci/types"
 
 	"github.com/cosmos/cosmos-sdk/store/types"
-	"github.com/cosmos/cosmos-sdk/telemetry"
 	scheduler "github.com/cosmos/cosmos-sdk/types/occ"
 	dbm "github.com/tendermint/tm-db"
 )
@@ -72,7 +69,8 @@ func (item *iterationTracker) SetEarlyStopKey(key []byte) {
 
 // Version Indexed Store wraps the multiversion store in a way that implements the KVStore interface, but also stores the index of the transaction, and so store actions are applied to the multiversion store using that index
 type VersionIndexedStore struct {
-	mtx sync.Mutex
+	// TODO: this shouldnt NEED a mutex because its used within single transaction execution, therefore no concurrency
+	// mtx sync.Mutex
 	// used for tracking reads and writes for eventual validation + persistence into multi-version store
 	// TODO: does this need sync.Map?
 	readset    map[string][]byte // contains the key -> value mapping for all keys read from the store (not mvkv, underlying store)
@@ -130,9 +128,10 @@ func (store *VersionIndexedStore) Get(key []byte) []byte {
 	// if the key is in the cache, return it
 
 	// don't have RW mutex because we have to update readset
-	store.mtx.Lock()
-	defer store.mtx.Unlock()
-	defer telemetry.MeasureSince(time.Now(), "store", "mvkv", "get")
+	// TODO: remove?
+	// store.mtx.Lock()
+	// defer store.mtx.Unlock()
+	// defer telemetry.MeasureSince(time.Now(), "store", "mvkv", "get")
 
 	types.AssertValidKey(key)
 	strKey := string(key)
@@ -176,9 +175,10 @@ func (store *VersionIndexedStore) parseValueAndUpdateReadset(strKey string, mvsV
 
 // This function iterates over the readset, validating that the values in the readset are consistent with the values in the multiversion store and underlying parent store, and returns a boolean indicating validity
 func (store *VersionIndexedStore) ValidateReadset() bool {
-	store.mtx.Lock()
-	defer store.mtx.Unlock()
-	defer telemetry.MeasureSince(time.Now(), "store", "mvkv", "validate_readset")
+	// TODO: remove?
+	// store.mtx.Lock()
+	// defer store.mtx.Unlock()
+	// defer telemetry.MeasureSince(time.Now(), "store", "mvkv", "validate_readset")
 
 	// sort the readset keys - this is so we have consistent behavior when theres varying conflicts within the readset (eg. read conflict vs estimate)
 	readsetKeys := make([]string, 0, len(store.readset))
@@ -225,9 +225,10 @@ func (store *VersionIndexedStore) ValidateReadset() bool {
 
 // Delete implements types.KVStore.
 func (store *VersionIndexedStore) Delete(key []byte) {
-	store.mtx.Lock()
-	defer store.mtx.Unlock()
-	defer telemetry.MeasureSince(time.Now(), "store", "mvkv", "delete")
+	// TODO: remove?
+	// store.mtx.Lock()
+	// defer store.mtx.Unlock()
+	// defer telemetry.MeasureSince(time.Now(), "store", "mvkv", "delete")
 
 	types.AssertValidKey(key)
 	store.setValue(key, nil, true, true)
@@ -241,9 +242,10 @@ func (store *VersionIndexedStore) Has(key []byte) bool {
 
 // Set implements types.KVStore.
 func (store *VersionIndexedStore) Set(key []byte, value []byte) {
-	store.mtx.Lock()
-	defer store.mtx.Unlock()
-	defer telemetry.MeasureSince(time.Now(), "store", "mvkv", "set")
+	// TODO: remove?
+	// store.mtx.Lock()
+	// defer store.mtx.Unlock()
+	// defer telemetry.MeasureSince(time.Now(), "store", "mvkv", "set")
 
 	types.AssertValidKey(key)
 	store.setValue(key, value, false, true)
@@ -262,8 +264,9 @@ func (v *VersionIndexedStore) ReverseIterator(start []byte, end []byte) dbm.Iter
 // TODO: still needs iterateset tracking
 // Iterator implements types.KVStore.
 func (store *VersionIndexedStore) iterator(start []byte, end []byte, ascending bool) dbm.Iterator {
-	store.mtx.Lock()
-	defer store.mtx.Unlock()
+	// TODO: remove?
+	// store.mtx.Lock()
+	// defer store.mtx.Unlock()
 
 	// get the sorted keys from MVS
 	// TODO: ideally we take advantage of mvs keys already being sorted
@@ -334,18 +337,20 @@ func (store *VersionIndexedStore) setValue(key, value []byte, deleted bool, dirt
 }
 
 func (store *VersionIndexedStore) WriteToMultiVersionStore() {
-	store.mtx.Lock()
-	defer store.mtx.Unlock()
-	defer telemetry.MeasureSince(time.Now(), "store", "mvkv", "write_mvs")
+	// TODO: remove?
+	// store.mtx.Lock()
+	// defer store.mtx.Unlock()
+	// defer telemetry.MeasureSince(time.Now(), "store", "mvkv", "write_mvs")
 	store.multiVersionStore.SetWriteset(store.transactionIndex, store.incarnation, store.writeset)
 	store.multiVersionStore.SetReadset(store.transactionIndex, store.readset)
 	store.multiVersionStore.SetIterateset(store.transactionIndex, store.iterateset)
 }
 
 func (store *VersionIndexedStore) WriteEstimatesToMultiVersionStore() {
-	store.mtx.Lock()
-	defer store.mtx.Unlock()
-	defer telemetry.MeasureSince(time.Now(), "store", "mvkv", "write_mvs")
+	// TODO: remove?
+	// store.mtx.Lock()
+	// defer store.mtx.Unlock()
+	// defer telemetry.MeasureSince(time.Now(), "store", "mvkv", "write_mvs")
 	store.multiVersionStore.SetEstimatedWriteset(store.transactionIndex, store.incarnation, store.writeset)
 	// TODO: do we need to write readset and iterateset in this case? I don't think so since if this is called it means we aren't doing validation
 }
diff --git a/store/multiversion/store.go b/store/multiversion/store.go
index 16fb04597..16b0e626b 100644
--- a/store/multiversion/store.go
+++ b/store/multiversion/store.go
@@ -4,10 +4,8 @@ import (
 	"bytes"
 	"sort"
 	"sync"
-	"time"
 
 	"github.com/cosmos/cosmos-sdk/store/types"
-	"github.com/cosmos/cosmos-sdk/telemetry"
 	"github.com/cosmos/cosmos-sdk/types/occ"
 	occtypes "github.com/cosmos/cosmos-sdk/types/occ"
 	db "github.com/tendermint/tm-db"
@@ -25,9 +23,11 @@ type MultiVersionStore interface {
 	CollectIteratorItems(index int) *db.MemDB
 	SetReadset(index int, readset ReadSet)
 	GetReadset(index int) ReadSet
+	ClearReadset(index int)
 	VersionedIndexedStore(index int, incarnation int, abortChannel chan occ.Abort) *VersionIndexedStore
 	SetIterateset(index int, iterateset Iterateset)
 	GetIterateset(index int) Iterateset
+	ClearIterateset(index int)
 	ValidateTransactionState(index int) (bool, []int)
 }
 
@@ -38,24 +38,23 @@ type Iterateset []iterationTracker
 var _ MultiVersionStore = (*Store)(nil)
 
 type Store struct {
-	mtx sync.RWMutex
-	// map that stores the key -> MultiVersionValue mapping for accessing from a given key
-	multiVersionMap map[string]MultiVersionValue
+	// map that stores the key string -> MultiVersionValue mapping for accessing from a given key
+	multiVersionMap *sync.Map
 	// TODO: do we need to support iterators as well similar to how cachekv does it - yes
 
-	txWritesetKeys map[int][]string // map of tx index -> writeset keys
-	txReadSets     map[int]ReadSet
-	txIterateSets  map[int]Iterateset
+	txWritesetKeys *sync.Map // map of tx index -> writeset keys []string
+	txReadSets     *sync.Map // map of tx index -> readset ReadSet
+	txIterateSets  *sync.Map // map of tx index -> iterateset Iterateset
 
 	parentStore types.KVStore
 }
 
 func NewMultiVersionStore(parentStore types.KVStore) *Store {
 	return &Store{
-		multiVersionMap: make(map[string]MultiVersionValue),
-		txWritesetKeys:  make(map[int][]string),
-		txReadSets:      make(map[int]ReadSet),
-		txIterateSets:   make(map[int]Iterateset),
+		multiVersionMap: &sync.Map{},
+		txWritesetKeys:  &sync.Map{},
+		txReadSets:      &sync.Map{},
+		txIterateSets:   &sync.Map{},
 		parentStore:     parentStore,
 	}
 }
@@ -67,32 +66,28 @@ func (s *Store) VersionedIndexedStore(index int, incarnation int, abortChannel c
 
 // GetLatest implements MultiVersionStore.
 func (s *Store) GetLatest(key []byte) (value MultiVersionValueItem) {
-	s.mtx.RLock()
-	defer s.mtx.RUnlock()
-
 	keyString := string(key)
+	mvVal, found := s.multiVersionMap.Load(keyString)
 	// if the key doesn't exist in the overall map, return nil
-	if _, ok := s.multiVersionMap[keyString]; !ok {
+	if !found {
 		return nil
 	}
-	val, found := s.multiVersionMap[keyString].GetLatest()
+	latestVal, found := mvVal.(MultiVersionValue).GetLatest()
 	if !found {
 		return nil // this is possible IF there is are writeset that are then removed for that key
 	}
-	return val
+	return latestVal
 }
 
 // GetLatestBeforeIndex implements MultiVersionStore.
 func (s *Store) GetLatestBeforeIndex(index int, key []byte) (value MultiVersionValueItem) {
-	s.mtx.RLock()
-	defer s.mtx.RUnlock()
-
 	keyString := string(key)
+	mvVal, found := s.multiVersionMap.Load(keyString)
 	// if the key doesn't exist in the overall map, return nil
-	if _, ok := s.multiVersionMap[keyString]; !ok {
+	if !found {
 		return nil
 	}
-	val, found := s.multiVersionMap[keyString].GetLatestBeforeIndex(index)
+	val, found := mvVal.(MultiVersionValue).GetLatestBeforeIndex(index)
 	// otherwise, we may have found a value for that key, but its not written before the index passed in
 	if !found {
 		return nil
@@ -103,24 +98,15 @@ func (s *Store) GetLatestBeforeIndex(index int, key []byte) (value MultiVersionV
 
 // Has implements MultiVersionStore. It checks if the key exists in the multiversion store at or before the specified index.
 func (s *Store) Has(index int, key []byte) bool {
-	s.mtx.RLock()
-	defer s.mtx.RUnlock()
 
 	keyString := string(key)
-	if _, ok := s.multiVersionMap[keyString]; !ok {
+	mvVal, found := s.multiVersionMap.Load(keyString)
+	// if the key doesn't exist in the overall map, return nil
+	if !found {
 		return false // this is okay because the caller of this will THEN need to access the parent store to verify that the key doesnt exist there
 	}
-	_, found := s.multiVersionMap[keyString].GetLatestBeforeIndex(index)
-	return found
-}
-
-// This function will try to intialize the multiversion item if it doesn't exist for a key specified by byte array
-// NOTE: this should be used within an acquired mutex lock
-func (s *Store) tryInitMultiVersionItem(keyString string) {
-	if _, ok := s.multiVersionMap[keyString]; !ok {
-		multiVersionValue := NewMultiVersionItem()
-		s.multiVersionMap[keyString] = multiVersionValue
-	}
+	_, foundVal := mvVal.(MultiVersionValue).GetLatestBeforeIndex(index)
+	return foundVal
 }
 
 func (s *Store) removeOldWriteset(index int, newWriteSet WriteSet) {
@@ -130,7 +116,9 @@ func (s *Store) removeOldWriteset(index int, newWriteSet WriteSet) {
 		writeset = newWriteSet
 	}
 	// if there is already a writeset existing, we should remove that fully
-	if keys, ok := s.txWritesetKeys[index]; ok {
+	oldKeys, loaded := s.txWritesetKeys.LoadAndDelete(index)
+	if loaded {
+		keys := oldKeys.([]string)
 		// we need to delete all of the keys in the writeset from the multiversion store
 		for _, key := range keys {
 			// small optimization to check if the new writeset is going to write this key, if so, we can leave it behind
@@ -139,59 +127,57 @@ func (s *Store) removeOldWriteset(index int, newWriteSet WriteSet) {
 				continue
 			}
 			// remove from the appropriate item if present in multiVersionMap
-			if val, ok := s.multiVersionMap[key]; ok {
-				val.Remove(index)
+			mvVal, found := s.multiVersionMap.Load(key)
+			// if the key doesn't exist in the overall map, return nil
+			if !found {
+				continue
 			}
+			mvVal.(MultiVersionValue).Remove(index)
 		}
 	}
-	// unset the writesetKeys for this index
-	delete(s.txWritesetKeys, index)
 }
 
 // SetWriteset sets a writeset for a transaction index, and also writes all of the multiversion items in the writeset to the multiversion store.
 // TODO: returns a list of NEW keys added
 func (s *Store) SetWriteset(index int, incarnation int, writeset WriteSet) {
-	s.mtx.Lock()
-	defer s.mtx.Unlock()
-
+	// TODO: add telemetry spans
 	// remove old writeset if it exists
 	s.removeOldWriteset(index, writeset)
 
 	writeSetKeys := make([]string, 0, len(writeset))
 	for key, value := range writeset {
 		writeSetKeys = append(writeSetKeys, key)
-		s.tryInitMultiVersionItem(key)
+		loadVal, _ := s.multiVersionMap.LoadOrStore(key, NewMultiVersionItem()) // init if necessary
+		mvVal := loadVal.(MultiVersionValue)
 		if value == nil {
 			// delete if nil value
-			s.multiVersionMap[key].Delete(index, incarnation)
+			// TODO: sync map
+			mvVal.Delete(index, incarnation)
 		} else {
-			s.multiVersionMap[key].Set(index, incarnation, value)
+			mvVal.Set(index, incarnation, value)
 		}
 	}
 	sort.Strings(writeSetKeys) // TODO: if we're sorting here anyways, maybe we just put it into a btree instead of a slice
-	s.txWritesetKeys[index] = writeSetKeys
+	s.txWritesetKeys.Store(index, writeSetKeys)
 }
 
 // InvalidateWriteset iterates over the keys for the given index and incarnation writeset and replaces with ESTIMATEs
 func (s *Store) InvalidateWriteset(index int, incarnation int) {
-	s.mtx.Lock()
-	defer s.mtx.Unlock()
-
-	if keys, ok := s.txWritesetKeys[index]; ok {
-		for _, key := range keys {
-			// invalidate all of the writeset items - is this suboptimal? - we could potentially do concurrently if slow because locking is on an item specific level
-			s.tryInitMultiVersionItem(key) // this SHOULD no-op because we're invalidating existing keys
-			s.multiVersionMap[key].SetEstimate(index, incarnation)
-		}
+	keysAny, found := s.txWritesetKeys.Load(index)
+	if !found {
+		return
+	}
+	keys := keysAny.([]string)
+	for _, key := range keys {
+		// invalidate all of the writeset items - is this suboptimal? - we could potentially do concurrently if slow because locking is on an item specific level
+		val, _ := s.multiVersionMap.LoadOrStore(key, NewMultiVersionItem())
+		val.(MultiVersionValue).SetEstimate(index, incarnation)
 	}
 	// we leave the writeset in place because we'll need it for key removal later if/when we replace with a new writeset
 }
 
 // SetEstimatedWriteset is used to directly write estimates instead of writing a writeset and later invalidating
 func (s *Store) SetEstimatedWriteset(index int, incarnation int, writeset WriteSet) {
-	s.mtx.Lock()
-	defer s.mtx.Unlock()
-
 	// remove old writeset if it exists
 	s.removeOldWriteset(index, writeset)
 
@@ -199,62 +185,71 @@ func (s *Store) SetEstimatedWriteset(index int, incarnation int, writeset WriteS
 	// still need to save the writeset so we can remove the elements later:
 	for key := range writeset {
 		writeSetKeys = append(writeSetKeys, key)
-		s.tryInitMultiVersionItem(key)
-		s.multiVersionMap[key].SetEstimate(index, incarnation)
+
+		mvVal, _ := s.multiVersionMap.LoadOrStore(key, NewMultiVersionItem()) // init if necessary
+		mvVal.(MultiVersionValue).SetEstimate(index, incarnation)
 	}
 	sort.Strings(writeSetKeys)
-	s.txWritesetKeys[index] = writeSetKeys
+	s.txWritesetKeys.Store(index, writeSetKeys)
 }
 
 // GetAllWritesetKeys implements MultiVersionStore.
 func (s *Store) GetAllWritesetKeys() map[int][]string {
-	s.mtx.RLock()
-	defer s.mtx.RUnlock()
-	return s.txWritesetKeys
+	writesetKeys := make(map[int][]string)
+	// TODO: is this safe?
+	s.txWritesetKeys.Range(func(key, value interface{}) bool {
+		index := key.(int)
+		keys := value.([]string)
+		writesetKeys[index] = keys
+		return true
+	})
+
+	return writesetKeys
 }
 
 func (s *Store) SetReadset(index int, readset ReadSet) {
-	s.mtx.Lock()
-	defer s.mtx.Unlock()
-
-	s.txReadSets[index] = readset
+	s.txReadSets.Store(index, readset)
 }
 
 func (s *Store) GetReadset(index int) ReadSet {
-	s.mtx.RLock()
-	defer s.mtx.RUnlock()
-
-	return s.txReadSets[index]
+	readsetAny, found := s.txReadSets.Load(index)
+	if !found {
+		return nil
+	}
+	return readsetAny.(ReadSet)
 }
 
 func (s *Store) SetIterateset(index int, iterateset Iterateset) {
-	s.mtx.Lock()
-	defer s.mtx.Unlock()
-
-	s.txIterateSets[index] = iterateset
+	s.txIterateSets.Store(index, iterateset)
 }
 
 func (s *Store) GetIterateset(index int) Iterateset {
-	s.mtx.RLock()
-	defer s.mtx.RUnlock()
+	iteratesetAny, found := s.txIterateSets.Load(index)
+	if !found {
+		return nil
+	}
+	return iteratesetAny.(Iterateset)
+}
 
-	return s.txIterateSets[index]
+func (s *Store) ClearReadset(index int) {
+	s.txReadSets.Delete(index)
+}
+
+func (s *Store) ClearIterateset(index int) {
+	s.txReadSets.Delete(index)
 }
 
 // CollectIteratorItems implements MultiVersionStore. It will return a memDB containing all of the keys present in the multiversion store within the iteration range prior to (exclusive of) the index.
 func (s *Store) CollectIteratorItems(index int) *db.MemDB {
-	s.mtx.RLock()
-	defer s.mtx.RUnlock()
-
 	sortedItems := db.NewMemDB()
 
 	// get all writeset keys prior to index
-	keys := s.txWritesetKeys
 	for i := 0; i < index; i++ {
-		indexedWriteset, ok := keys[i]
-		if !ok {
+		writesetAny, found := s.txWritesetKeys.Load(i)
+		if !found {
 			continue
 		}
+		indexedWriteset := writesetAny.([]string)
 		// TODO: do we want to exclude keys out of the range or just let the iterator handle it?
 		for _, key := range indexedWriteset {
 			// TODO: inefficient because (logn) for each key + rebalancing? maybe theres a better way to add to a tree to reduce rebalancing overhead
@@ -278,6 +273,7 @@ func (s *Store) validateIterator(index int, tracker iterationTracker) bool {
 	go func(iterationTracker iterationTracker, items *db.MemDB, returnChan chan bool, abortChan chan occtypes.Abort) {
 		var parentIter types.Iterator
 		expectedKeys := iterationTracker.iteratedKeys
+		foundKeys := 0
 		iter := s.newMVSValidationIterator(index, iterationTracker.startKey, iterationTracker.endKey, items, iterationTracker.ascending, iterationTracker.writeset, abortChan)
 		if iterationTracker.ascending {
 			parentIter = s.parentStore.Iterator(iterationTracker.startKey, iterationTracker.endKey)
@@ -288,19 +284,21 @@ func (s *Store) validateIterator(index int, tracker iterationTracker) bool {
 		mergeIterator := NewMVSMergeIterator(parentIter, iter, iterationTracker.ascending, NoOpHandler{})
 		defer mergeIterator.Close()
 		for ; mergeIterator.Valid(); mergeIterator.Next() {
-			if len(expectedKeys) == 0 {
+			if (len(expectedKeys) - foundKeys) == 0 {
 				// if we have no more expected keys, then the iterator is invalid
 				returnChan <- false
 				return
 			}
 			key := mergeIterator.Key()
+			// TODO: is this ok to not delete the key since we shouldnt have duplicate keys?
 			if _, ok := expectedKeys[string(key)]; !ok {
 				// if key isn't found
 				returnChan <- false
 				return
 			}
 			// remove from expected keys
-			delete(expectedKeys, string(key))
+			foundKeys += 1
+			// delete(expectedKeys, string(key))
 
 			// if our iterator key was the early stop, then we can break
 			if bytes.Equal(key, iterationTracker.earlyStopKey) {
@@ -308,7 +306,7 @@ func (s *Store) validateIterator(index int, tracker iterationTracker) bool {
 				return
 			}
 		}
-		returnChan <- !(len(expectedKeys) > 0)
+		returnChan <- !((len(expectedKeys) - foundKeys) > 0)
 	}(tracker, sortedItems, validChannel, abortChannel)
 	select {
 	case <-abortChannel:
@@ -321,10 +319,11 @@ func (s *Store) validateIterator(index int, tracker iterationTracker) bool {
 
 func (s *Store) checkIteratorAtIndex(index int) bool {
 	valid := true
-	s.mtx.RLock()
-	iterateset := s.txIterateSets[index]
-	s.mtx.RUnlock()
-
+	iterateSetAny, found := s.txIterateSets.Load(index)
+	if !found {
+		return true
+	}
+	iterateset := iterateSetAny.(Iterateset)
 	for _, iterationTracker := range iterateset {
 		iteratorValid := s.validateIterator(index, iterationTracker)
 		valid = valid && iteratorValid
@@ -334,13 +333,13 @@ func (s *Store) checkIteratorAtIndex(index int) bool {
 
 func (s *Store) checkReadsetAtIndex(index int) (bool, []int) {
 	conflictSet := make(map[int]struct{})
-
-	s.mtx.RLock()
-	readset := s.txReadSets[index]
-	s.mtx.RUnlock()
-
 	valid := true
 
+	readSetAny, found := s.txReadSets.Load(index)
+	if !found {
+		return true, []int{}
+	}
+	readset := readSetAny.(ReadSet)
 	// iterate over readset and check if the value is the same as the latest value relateive to txIndex in the multiversion store
 	for key, value := range readset {
 		// get the latest value from the multiversion store
@@ -379,7 +378,7 @@ func (s *Store) checkReadsetAtIndex(index int) (bool, []int) {
 
 // TODO: do we want to return bool + []int where bool indicates whether it was valid and then []int indicates only ones for which we need to wait due to estimates? - yes i think so?
 func (s *Store) ValidateTransactionState(index int) (bool, []int) {
-	defer telemetry.MeasureSince(time.Now(), "store", "mvs", "validate")
+	// defer telemetry.MeasureSince(time.Now(), "store", "mvs", "validate")
 
 	// TODO: can we parallelize for all iterators?
 	iteratorValid := s.checkIteratorAtIndex(index)
@@ -390,18 +389,20 @@ func (s *Store) ValidateTransactionState(index int) (bool, []int) {
 }
 
 func (s *Store) WriteLatestToStore() {
-	s.mtx.Lock()
-	defer s.mtx.Unlock()
-
 	// sort the keys
-	keys := make([]string, 0, len(s.multiVersionMap))
-	for key := range s.multiVersionMap {
-		keys = append(keys, key)
-	}
+	keys := []string{}
+	s.multiVersionMap.Range(func(key, value interface{}) bool {
+		keys = append(keys, key.(string))
+		return true
+	})
 	sort.Strings(keys)
 
 	for _, key := range keys {
-		mvValue, found := s.multiVersionMap[key].GetLatestNonEstimate()
+		val, ok := s.multiVersionMap.Load(key)
+		if !ok {
+			continue
+		}
+		mvValue, found := val.(MultiVersionValue).GetLatestNonEstimate()
 		if !found {
 			// this means that at some point, there was an estimate, but we have since removed it so there isn't anything writeable at the key, so we can skip
 			continue
diff --git a/store/types/cache.go b/store/types/cache.go
index 53f45d6b3..b00335a76 100644
--- a/store/types/cache.go
+++ b/store/types/cache.go
@@ -47,7 +47,7 @@ type BoundedCache struct {
 	CacheBackend
 	limit int
 
-	mu *sync.Mutex
+	mu         *sync.Mutex
 	metricName []string
 }
 
@@ -88,7 +88,7 @@ func (c *BoundedCache) emitKeysEvictedMetrics(keysToEvict int) {
 func (c *BoundedCache) Set(key string, val *CValue) {
 	c.mu.Lock()
 	defer c.mu.Unlock()
-	defer c.emitCacheSizeMetric()
+	// defer c.emitCacheSizeMetric()
 
 	if c.Len() >= c.limit {
 		numEntries := c.Len()
@@ -112,7 +112,7 @@ func (c *BoundedCache) Set(key string, val *CValue) {
 func (c *BoundedCache) Delete(key string) {
 	c.mu.Lock()
 	defer c.mu.Unlock()
-	defer c.emitCacheSizeMetric()
+	// defer c.emitCacheSizeMetric()
 
 	c.CacheBackend.Delete(key)
 }
@@ -120,7 +120,7 @@ func (c *BoundedCache) Delete(key string) {
 func (c *BoundedCache) DeleteAll() {
 	c.mu.Lock()
 	defer c.mu.Unlock()
-	defer c.emitCacheSizeMetric()
+	// defer c.emitCacheSizeMetric()
 
 	c.CacheBackend.Range(func(key string, _ *CValue) bool {
 		c.CacheBackend.Delete(key)
diff --git a/tasks/scheduler.go b/tasks/scheduler.go
index c00e70dbe..7fe65ef07 100644
--- a/tasks/scheduler.go
+++ b/tasks/scheduler.go
@@ -91,6 +91,8 @@ func NewScheduler(workers int, tracingInfo *tracing.Info, deliverTxFunc func(ctx
 func (s *scheduler) invalidateTask(task *deliverTxTask) {
 	for _, mv := range s.multiVersionStores {
 		mv.InvalidateWriteset(task.Index, task.Incarnation)
+		mv.ClearReadset(task.Index)
+		mv.ClearIterateset(task.Index)
 	}
 }
 

From 92457bdc4b2d6877bcd5078ad289288cc73dcc58 Mon Sep 17 00:00:00 2001
From: Steven Landers <steven.landers@gmail.com>
Date: Mon, 27 Nov 2023 19:55:20 -0500
Subject: [PATCH 24/65] add async scheduler

---
 tasks/heap.go            |  19 +++++
 tasks/queue.go           | 168 +++++++++++++++++++++++++++++++++++++++
 tasks/queue_test.go      | 121 ++++++++++++++++++++++++++++
 tasks/scheduler.go       |  41 +++++++++-
 tasks/scheduler_async.go | 159 ++++++++++++++++++++++++++++++++++++
 tasks/scheduler_test.go  |   4 +-
 6 files changed, 508 insertions(+), 4 deletions(-)
 create mode 100644 tasks/heap.go
 create mode 100644 tasks/queue.go
 create mode 100644 tasks/queue_test.go
 create mode 100644 tasks/scheduler_async.go

diff --git a/tasks/heap.go b/tasks/heap.go
new file mode 100644
index 000000000..902615a91
--- /dev/null
+++ b/tasks/heap.go
@@ -0,0 +1,19 @@
+package tasks
+
+type taskHeap []int
+
+func (h taskHeap) Len() int           { return len(h) }
+func (h taskHeap) Less(i, j int) bool { return h[i] < h[j] }
+func (h taskHeap) Swap(i, j int)      { h[i], h[j] = h[j], h[i] }
+
+func (h *taskHeap) Push(x interface{}) {
+	*h = append(*h, x.(int))
+}
+
+func (h *taskHeap) Pop() interface{} {
+	old := *h
+	n := len(old)
+	x := old[n-1]
+	*h = old[0 : n-1]
+	return x
+}
diff --git a/tasks/queue.go b/tasks/queue.go
new file mode 100644
index 000000000..15f18bc12
--- /dev/null
+++ b/tasks/queue.go
@@ -0,0 +1,168 @@
+package tasks
+
+import (
+	"container/heap"
+	"sync"
+)
+
+type TaskType int
+
+const (
+	TypeIdle TaskType = iota
+	TypeExecution
+	TypeValidation
+)
+
+type SchedulerQueue struct {
+	mx   sync.Mutex
+	cond *sync.Cond
+	once sync.Once
+
+	active  sync.Map
+	tasks   []*deliverTxTask
+	queue   *taskHeap
+	workers int
+	closed  bool
+}
+
+func NewSchedulerQueue(tasks []*deliverTxTask, workers int) *SchedulerQueue {
+	sq := &SchedulerQueue{
+		tasks:   tasks,
+		queue:   &taskHeap{},
+		workers: workers,
+	}
+	sq.cond = sync.NewCond(&sq.mx)
+
+	return sq
+}
+
+func (sq *SchedulerQueue) Lock() {
+	sq.mx.Lock()
+}
+
+func (sq *SchedulerQueue) Unlock() {
+	sq.mx.Unlock()
+}
+
+func (sq *SchedulerQueue) SetToIdle(idx int) {
+	sq.Lock()
+	defer sq.Unlock()
+	sq.tasks[idx].Type = TypeIdle
+	sq.active.Delete(idx)
+}
+
+func (sq *SchedulerQueue) ReExecute(idx int) {
+	sq.Lock()
+	defer sq.Unlock()
+
+	TaskLog(sq.tasks[idx], "-> re-execute")
+
+	sq.tasks[idx].ResetForExecution()
+	sq.pushTask(idx)
+}
+
+// ReValidate is a helper method that revalidates a task
+// without making it eligible for other workers to request it to validate
+func (sq *SchedulerQueue) ReValidate(idx int) {
+	sq.Lock()
+	defer sq.Unlock()
+
+	if sq.tasks[idx].Type != TypeValidation {
+		panic("trying to re-validate a task not in validation state")
+	}
+
+	TaskLog(sq.tasks[idx], "-> re-validate")
+	sq.tasks[idx].Status = statusExecuted
+	sq.pushTask(idx)
+}
+
+func (sq *SchedulerQueue) IsCompleted() bool {
+	sq.Lock()
+	defer sq.Unlock()
+
+	if len(*sq.queue) == 0 {
+		for _, t := range sq.tasks {
+			if !t.IsValid() || t.Type != TypeIdle {
+				TaskLog(t, "not valid or not idle")
+				return false
+			}
+		}
+		return true
+	}
+	return false
+}
+
+func (sq *SchedulerQueue) ValidateExecutedTask(idx int) {
+	sq.Lock()
+	defer sq.Unlock()
+
+	if sq.tasks[idx].Type != TypeExecution {
+		TaskLog(sq.tasks[idx], "not in execution")
+		panic("trying to validate a task not in execution")
+	}
+
+	TaskLog(sq.tasks[idx], "-> validate")
+	sq.tasks[idx].Type = TypeValidation
+	sq.pushTask(idx)
+}
+
+func (sq *SchedulerQueue) AddValidationTask(idx int) {
+	sq.Lock()
+	defer sq.Unlock()
+
+	// already active
+	if _, ok := sq.active.Load(idx); ok {
+		return
+	}
+
+	TaskLog(sq.tasks[idx], "-> validate")
+	sq.tasks[idx].Status = statusExecuted
+	sq.tasks[idx].Type = TypeValidation
+	sq.pushTask(idx)
+}
+
+func (sq *SchedulerQueue) pushTask(idx int) {
+	sq.active.Store(idx, struct{}{})
+	heap.Push(sq.queue, idx)
+	sq.cond.Broadcast()
+}
+
+func (sq *SchedulerQueue) AddExecutionTask(idx int) {
+	sq.Lock()
+	defer sq.Unlock()
+
+	// already active
+	if _, ok := sq.active.Load(idx); ok {
+		return
+	}
+
+	TaskLog(sq.tasks[idx], "-> execute")
+
+	sq.tasks[idx].Type = TypeExecution
+	sq.pushTask(idx)
+}
+
+func (sq *SchedulerQueue) NextTask() (*deliverTxTask, bool) {
+	sq.Lock()
+	defer sq.Unlock()
+
+	for len(*sq.queue) == 0 && !sq.closed {
+		sq.cond.Wait()
+	}
+
+	if sq.closed {
+		return nil, false
+	}
+
+	idx := heap.Pop(sq.queue).(int)
+	return sq.tasks[idx], true
+}
+
+func (sq *SchedulerQueue) Close() {
+	sq.once.Do(func() {
+		sq.Lock()
+		defer sq.Unlock()
+		sq.closed = true
+		sq.cond.Broadcast()
+	})
+}
diff --git a/tasks/queue_test.go b/tasks/queue_test.go
new file mode 100644
index 000000000..a6ecdb142
--- /dev/null
+++ b/tasks/queue_test.go
@@ -0,0 +1,121 @@
+package tasks
+
+import (
+	"testing"
+)
+
+func generateTasks(count int) []*deliverTxTask {
+	var res []*deliverTxTask
+	for i := 0; i < count; i++ {
+		res = append(res, &deliverTxTask{Index: i})
+	}
+	return res
+}
+
+func TestNewSchedulerQueue(t *testing.T) {
+	tasks := generateTasks(10)
+	sq := NewSchedulerQueue(tasks, 5)
+
+	if len(sq.tasks) != len(tasks) {
+		t.Errorf("Expected tasks length %d, but got %d", len(tasks), len(sq.tasks))
+	}
+}
+
+func TestAddValidationTask(t *testing.T) {
+	tasks := generateTasks(10)
+	sq := NewSchedulerQueue(tasks, 5)
+
+	sq.AddValidationTask(1)
+
+	if sq.tasks[1].Type != TypeValidation {
+		t.Errorf("Expected task type %d, but got %d", TypeValidation, sq.tasks[1].Type)
+	}
+}
+
+func TestAddExecutionTask(t *testing.T) {
+	tasks := generateTasks(10)
+	sq := NewSchedulerQueue(tasks, 5)
+
+	sq.AddExecutionTask(1)
+
+	if sq.tasks[1].Type != TypeExecution {
+		t.Errorf("Expected task type %d, but got %d", TypeExecution, sq.tasks[1].Type)
+	}
+}
+
+func TestSetToIdle(t *testing.T) {
+	tasks := generateTasks(10)
+	sq := NewSchedulerQueue(tasks, 5)
+
+	sq.AddExecutionTask(1)
+	sq.SetToIdle(1)
+
+	if sq.tasks[1].Type != TypeIdle {
+		t.Errorf("Expected task type %d, but got %d", TypeIdle, sq.tasks[1].Type)
+	}
+}
+
+func TestNextTask(t *testing.T) {
+	tasks := generateTasks(10)
+	sq := NewSchedulerQueue(tasks, 5)
+
+	sq.AddExecutionTask(1)
+	task, _ := sq.NextTask()
+
+	if task != sq.tasks[1] {
+		t.Errorf("Expected task %v, but got %v", sq.tasks[1], task)
+	}
+}
+
+func TestClose(t *testing.T) {
+	tasks := generateTasks(10)
+	sq := NewSchedulerQueue(tasks, 5)
+
+	sq.Close()
+
+	if sq.closed != true {
+		t.Errorf("Expected closed to be true, but got %v", sq.closed)
+	}
+}
+
+func TestNextTaskOrder(t *testing.T) {
+	tasks := generateTasks(10)
+	sq := NewSchedulerQueue(tasks, 5)
+
+	// Add tasks in non-sequential order
+	sq.AddExecutionTask(3)
+	sq.AddExecutionTask(1)
+	sq.AddExecutionTask(2)
+	sq.AddExecutionTask(4)
+
+	// The task with the lowest index should be returned first
+	task, _ := sq.NextTask()
+	if task != sq.tasks[1] {
+		t.Errorf("Expected task %v, but got %v", sq.tasks[1], task)
+	}
+}
+
+func TestAddValidationTaskWhenActive(t *testing.T) {
+	tasks := generateTasks(10)
+	sq := NewSchedulerQueue(tasks, 5)
+
+	// Add task to execution queue
+	sq.AddExecutionTask(1)
+	// Try to add the same task to validation queue
+	sq.AddValidationTask(1)
+
+	// Verify that the task's type is still TypeExecution
+	if sq.tasks[1].Type != TypeExecution {
+		t.Errorf("Expected task type %d, but got %d", TypeExecution, sq.tasks[1].Type)
+	}
+
+	// Add task to validation queue
+	sq.AddValidationTask(2)
+	// Try to add the same task to validation queue again
+	sq.AddValidationTask(2)
+
+	// Verify that the task's type is still TypeValidation
+	if sq.tasks[2].Type != TypeValidation {
+		t.Errorf("Expected task type %d, but got %d", TypeValidation, sq.tasks[2].Type)
+	}
+}
diff --git a/tasks/scheduler.go b/tasks/scheduler.go
index 7fe65ef07..93e971cff 100644
--- a/tasks/scheduler.go
+++ b/tasks/scheduler.go
@@ -33,6 +33,8 @@ const (
 	// statusValidated means the task has been validated
 	// tasks in this status can be reset if an earlier task fails validation
 	statusValidated status = "validated"
+	// statusInvalid means the task has been validated and is not valid
+	statusInvalid status = "invalid"
 	// statusWaiting tasks are waiting for another tx to complete
 	statusWaiting status = "waiting"
 )
@@ -41,6 +43,7 @@ type deliverTxTask struct {
 	Ctx     sdk.Context
 	AbortCh chan occ.Abort
 
+	Type          TaskType
 	Status        status
 	Dependencies  []int
 	Abort         *occ.Abort
@@ -52,6 +55,18 @@ type deliverTxTask struct {
 	ValidateCh    chan struct{}
 }
 
+func (dt *deliverTxTask) IsInvalid() bool {
+	return dt.Status == statusInvalid || dt.Status == statusAborted
+}
+
+func (dt *deliverTxTask) IsValid() bool {
+	return dt.Status == statusValidated
+}
+
+func (dt *deliverTxTask) IsWaiting() bool {
+	return dt.Status == statusWaiting
+}
+
 func (dt *deliverTxTask) Reset() {
 	dt.Status = statusPending
 	dt.Response = nil
@@ -61,6 +76,18 @@ func (dt *deliverTxTask) Reset() {
 	dt.VersionStores = nil
 }
 
+func (dt *deliverTxTask) ResetForExecution() {
+	dt.Status = statusPending
+	dt.Type = TypeExecution
+	dt.Response = nil
+	dt.Abort = nil
+	dt.AbortCh = nil
+	dt.Dependencies = nil
+	dt.VersionStores = nil
+	dt.Incarnation++
+	dt.ValidateCh = make(chan struct{}, 1)
+}
+
 func (dt *deliverTxTask) Increment() {
 	dt.Incarnation++
 	dt.ValidateCh = make(chan struct{}, 1)
@@ -68,6 +95,7 @@ func (dt *deliverTxTask) Increment() {
 
 // Scheduler processes tasks concurrently
 type Scheduler interface {
+	ProcessAllSync(ctx sdk.Context, reqs []*sdk.DeliverTxEntry) ([]types.ResponseDeliverTx, error)
 	ProcessAll(ctx sdk.Context, reqs []*sdk.DeliverTxEntry) ([]types.ResponseDeliverTx, error)
 }
 
@@ -178,7 +206,7 @@ func (s *scheduler) PrefillEstimates(ctx sdk.Context, reqs []*sdk.DeliverTxEntry
 	}
 }
 
-func (s *scheduler) ProcessAll(ctx sdk.Context, reqs []*sdk.DeliverTxEntry) ([]types.ResponseDeliverTx, error) {
+func (s *scheduler) ProcessAllSync(ctx sdk.Context, reqs []*sdk.DeliverTxEntry) ([]types.ResponseDeliverTx, error) {
 	// initialize mutli-version stores if they haven't been initialized yet
 	s.tryInitMultiVersionStore(ctx)
 	// prefill estimates
@@ -223,6 +251,7 @@ func (s *scheduler) shouldRerun(task *deliverTxTask) bool {
 
 			// if the conflicts are now validated, then rerun this task
 			if indexesValidated(s.allTasks, conflicts) {
+				task.Status = statusInvalid
 				return true
 			} else {
 				// otherwise, wait for completion
@@ -234,13 +263,21 @@ func (s *scheduler) shouldRerun(task *deliverTxTask) bool {
 			// mark as validated, which will avoid re-validating unless a lower-index re-validates
 			task.Status = statusValidated
 			return false
+		} else {
+			TaskLog(task, fmt.Sprintf("conflicts: %d", len(conflicts)))
+			task.Status = statusInvalid
+			return true
 		}
 		// conflicts and valid, so it'll validate next time
 		return false
 
 	case statusWaiting:
 		// if conflicts are done, then this task is ready to run again
-		return indexesValidated(s.allTasks, task.Dependencies)
+		if indexesValidated(s.allTasks, task.Dependencies) {
+			task.Status = statusPending
+			return true
+		}
+		return false
 	}
 	panic("unexpected status: " + task.Status)
 }
diff --git a/tasks/scheduler_async.go b/tasks/scheduler_async.go
new file mode 100644
index 000000000..de85fbbee
--- /dev/null
+++ b/tasks/scheduler_async.go
@@ -0,0 +1,159 @@
+package tasks
+
+import (
+	"context"
+	"fmt"
+	"sync"
+	"sync/atomic"
+	"time"
+
+	sdk "github.com/cosmos/cosmos-sdk/types"
+	"github.com/tendermint/tendermint/abci/types"
+)
+
+// TODO: remove after we have a good sense this is working
+func TaskLog(task *deliverTxTask, msg string) {
+	// helpful for debugging state transitions
+	//fmt.Println(fmt.Sprintf("Task(%d\t%s):\t%s", task.Index, task.Status, msg))
+}
+
+func waitWithMsg(msg string) context.CancelFunc {
+	goctx, cancel := context.WithCancel(context.Background())
+	tick := time.NewTicker(1 * time.Second)
+	go func() {
+		for {
+			select {
+			case <-goctx.Done():
+				return
+			case <-tick.C:
+				fmt.Println(msg)
+			}
+		}
+	}()
+	return cancel
+}
+
+func (s *scheduler) ProcessAll(ctx sdk.Context, reqs []*sdk.DeliverTxEntry) ([]types.ResponseDeliverTx, error) {
+	// initialize mutli-version stores if they haven't been initialized yet
+	s.tryInitMultiVersionStore(ctx)
+	// prefill estimates
+	s.PrefillEstimates(ctx, reqs)
+	tasks := toTasks(reqs)
+	s.allTasks = tasks
+
+	workers := s.workers
+	if s.workers < 1 {
+		workers = len(tasks)
+	}
+
+	// initialize scheduler queue
+	queue := NewSchedulerQueue(tasks, workers)
+	for _, t := range tasks {
+		queue.AddExecutionTask(t.Index)
+	}
+
+	ch := make(chan int, len(tasks))
+	active := atomic.Int32{}
+	wg := sync.WaitGroup{}
+	wg.Add(workers)
+	var final bool
+
+	for i := 0; i < workers; i++ {
+		go func(worker int) {
+			defer wg.Done()
+
+			for {
+				nt := waitWithMsg(fmt.Sprintf("worker=%d: next task...(%d)", worker, active.Load()))
+				t, ok := queue.NextTask()
+				nt()
+				if !ok {
+					return
+				}
+				active.Add(1)
+				if t.Incarnation > 20 {
+					panic("too many incarnations")
+				}
+				if s.processTask(t, ctx, queue, tasks) {
+					ch <- t.Index
+				} else {
+					final = false
+				}
+				active.Add(-1)
+			}
+
+		}(i)
+	}
+
+	wg.Add(1)
+	go func() {
+		defer close(ch)
+		defer wg.Done()
+		defer queue.Close()
+		for {
+			select {
+			case <-ctx.Context().Done():
+				return
+			case <-ch:
+				// if all tasks are completed AND there are no more tasks in the queue
+				if active.Load() == 0 && queue.IsCompleted() {
+					if final {
+						return
+					}
+					// try one more validation of everything
+					final = true
+					for i := 0; i < len(tasks); i++ {
+						queue.AddValidationTask(i)
+					}
+				}
+			}
+		}
+	}()
+
+	wg.Wait()
+
+	for _, mv := range s.multiVersionStores {
+		mv.WriteLatestToStore()
+	}
+	return collectResponses(tasks), nil
+}
+
+func (s *scheduler) processTask(t *deliverTxTask, ctx sdk.Context, queue *SchedulerQueue, tasks []*deliverTxTask) bool {
+	switch t.Type {
+	case TypeValidation:
+		TaskLog(t, "validate")
+		s.validateTask(ctx, t)
+		switch t.Status {
+		case statusValidated:
+			TaskLog(t, "VALIDATED")
+			queue.SetToIdle(t.Index)
+			return true
+		case statusWaiting:
+			queue.ReValidate(t.Index)
+		case statusInvalid:
+			queue.ReExecute(t.Index)
+			for i := t.Index + 1; i < len(tasks); i++ {
+				queue.AddValidationTask(i)
+			}
+		case statusAborted:
+			//if s.allTasks[t.Abort.DependentTxIdx].Status == statusValidated {
+			//	queue.ReExecute(t.Abort.DependentTxIdx)
+			//}
+			queue.ReExecute(t.Index)
+		case statusPending:
+			queue.ReExecute(t.Index)
+		default:
+			TaskLog(t, "unexpected status")
+			panic("unexpected status ")
+		}
+
+	case TypeExecution:
+		TaskLog(t, "execute")
+
+		s.executeTask(ctx, t)
+		queue.ValidateExecutedTask(t.Index)
+	default:
+		TaskLog(t, "unexpected type")
+		panic("unexpected type ")
+	}
+	return false
+}
diff --git a/tasks/scheduler_test.go b/tasks/scheduler_test.go
index 9d24b54a8..fd45f106a 100644
--- a/tasks/scheduler_test.go
+++ b/tasks/scheduler_test.go
@@ -65,10 +65,10 @@ func TestProcessAll(t *testing.T) {
 	}{
 		{
 			name:      "Test every tx accesses same key",
-			workers:   50,
+			workers:   5,
 			runs:      50,
 			addStores: true,
-			requests:  requestList(50),
+			requests:  requestList(100),
 			deliverTxFunc: func(ctx sdk.Context, req types.RequestDeliverTx) types.ResponseDeliverTx {
 				// all txs read and write to the same key to maximize conflicts
 				kv := ctx.MultiStore().GetKVStore(testStoreKey)

From 8f58b8c2363ff23af1a1953e0e3179a59891e23a Mon Sep 17 00:00:00 2001
From: Steven Landers <steven.landers@gmail.com>
Date: Mon, 27 Nov 2023 20:22:02 -0500
Subject: [PATCH 25/65] small fixes

---
 tasks/queue.go           |  1 +
 tasks/scheduler.go       |  4 ----
 tasks/scheduler_async.go | 22 +++++++++++-----------
 tasks/scheduler_test.go  |  3 ++-
 4 files changed, 14 insertions(+), 16 deletions(-)

diff --git a/tasks/queue.go b/tasks/queue.go
index 15f18bc12..043067db1 100644
--- a/tasks/queue.go
+++ b/tasks/queue.go
@@ -72,6 +72,7 @@ func (sq *SchedulerQueue) ReValidate(idx int) {
 	}
 
 	TaskLog(sq.tasks[idx], "-> re-validate")
+	sq.tasks[idx].Abort = nil
 	sq.tasks[idx].Status = statusExecuted
 	sq.pushTask(idx)
 }
diff --git a/tasks/scheduler.go b/tasks/scheduler.go
index 93e971cff..7be2917bb 100644
--- a/tasks/scheduler.go
+++ b/tasks/scheduler.go
@@ -263,10 +263,6 @@ func (s *scheduler) shouldRerun(task *deliverTxTask) bool {
 			// mark as validated, which will avoid re-validating unless a lower-index re-validates
 			task.Status = statusValidated
 			return false
-		} else {
-			TaskLog(task, fmt.Sprintf("conflicts: %d", len(conflicts)))
-			task.Status = statusInvalid
-			return true
 		}
 		// conflicts and valid, so it'll validate next time
 		return false
diff --git a/tasks/scheduler_async.go b/tasks/scheduler_async.go
index de85fbbee..461cd0cb2 100644
--- a/tasks/scheduler_async.go
+++ b/tasks/scheduler_async.go
@@ -11,12 +11,14 @@ import (
 	"github.com/tendermint/tendermint/abci/types"
 )
 
-// TODO: remove after we have a good sense this is working
+// TODO: remove after things work
 func TaskLog(task *deliverTxTask, msg string) {
 	// helpful for debugging state transitions
-	//fmt.Println(fmt.Sprintf("Task(%d\t%s):\t%s", task.Index, task.Status, msg))
+	fmt.Println(fmt.Sprintf("Task(%d\t%s):\t%s", task.Index, task.Status, msg))
 }
 
+// TODO: remove after things work
+// waitWithMsg prints a message every 1s, so we can tell what's hanging
 func waitWithMsg(msg string) context.CancelFunc {
 	goctx, cancel := context.WithCancel(context.Background())
 	tick := time.NewTicker(1 * time.Second)
@@ -70,9 +72,6 @@ func (s *scheduler) ProcessAll(ctx sdk.Context, reqs []*sdk.DeliverTxEntry) ([]t
 					return
 				}
 				active.Add(1)
-				if t.Incarnation > 20 {
-					panic("too many incarnations")
-				}
 				if s.processTask(t, ctx, queue, tasks) {
 					ch <- t.Index
 				} else {
@@ -122,24 +121,25 @@ func (s *scheduler) processTask(t *deliverTxTask, ctx sdk.Context, queue *Schedu
 	case TypeValidation:
 		TaskLog(t, "validate")
 		s.validateTask(ctx, t)
+
+		// check the outcome of validation and do things accordingly
 		switch t.Status {
 		case statusValidated:
+			// task is possibly finished (can be re-validated by others)
 			TaskLog(t, "VALIDATED")
 			queue.SetToIdle(t.Index)
 			return true
-		case statusWaiting:
+		case statusWaiting, statusExecuted:
+			// task should be re-validated (waiting on others)
 			queue.ReValidate(t.Index)
 		case statusInvalid:
+			// task should be re-executed along with all +1 tasks
 			queue.ReExecute(t.Index)
 			for i := t.Index + 1; i < len(tasks); i++ {
 				queue.AddValidationTask(i)
 			}
 		case statusAborted:
-			//if s.allTasks[t.Abort.DependentTxIdx].Status == statusValidated {
-			//	queue.ReExecute(t.Abort.DependentTxIdx)
-			//}
-			queue.ReExecute(t.Index)
-		case statusPending:
+			// task should be re-executed
 			queue.ReExecute(t.Index)
 		default:
 			TaskLog(t, "unexpected status")
diff --git a/tasks/scheduler_test.go b/tasks/scheduler_test.go
index fd45f106a..b9ffe0aa1 100644
--- a/tasks/scheduler_test.go
+++ b/tasks/scheduler_test.go
@@ -65,7 +65,7 @@ func TestProcessAll(t *testing.T) {
 	}{
 		{
 			name:      "Test every tx accesses same key",
-			workers:   5,
+			workers:   50,
 			runs:      50,
 			addStores: true,
 			requests:  requestList(100),
@@ -122,6 +122,7 @@ func TestProcessAll(t *testing.T) {
 		t.Run(tt.name, func(t *testing.T) {
 			for i := 0; i < tt.runs; i++ {
 				// set a tracer provider
+				fmt.Println("**************************** RUN ", i, "****************************")
 				tp := trace.NewNoopTracerProvider()
 				otel.SetTracerProvider(trace.NewNoopTracerProvider())
 				tr := tp.Tracer("scheduler-test")

From ed15067b4b0705cd7c64965748079cbf83f968f9 Mon Sep 17 00:00:00 2001
From: Steven Landers <steven.landers@gmail.com>
Date: Mon, 27 Nov 2023 20:33:35 -0500
Subject: [PATCH 26/65] remove waiting to avoid data race for now

---
 tasks/scheduler.go | 21 ++-------------------
 1 file changed, 2 insertions(+), 19 deletions(-)

diff --git a/tasks/scheduler.go b/tasks/scheduler.go
index 7be2917bb..99b73de05 100644
--- a/tasks/scheduler.go
+++ b/tasks/scheduler.go
@@ -248,17 +248,8 @@ func (s *scheduler) shouldRerun(task *deliverTxTask) bool {
 	case statusExecuted, statusValidated:
 		if valid, conflicts := s.findConflicts(task); !valid {
 			s.invalidateTask(task)
-
-			// if the conflicts are now validated, then rerun this task
-			if indexesValidated(s.allTasks, conflicts) {
-				task.Status = statusInvalid
-				return true
-			} else {
-				// otherwise, wait for completion
-				task.Dependencies = conflicts
-				task.Status = statusWaiting
-				return false
-			}
+			task.Status = statusInvalid
+			return true
 		} else if len(conflicts) == 0 {
 			// mark as validated, which will avoid re-validating unless a lower-index re-validates
 			task.Status = statusValidated
@@ -266,14 +257,6 @@ func (s *scheduler) shouldRerun(task *deliverTxTask) bool {
 		}
 		// conflicts and valid, so it'll validate next time
 		return false
-
-	case statusWaiting:
-		// if conflicts are done, then this task is ready to run again
-		if indexesValidated(s.allTasks, task.Dependencies) {
-			task.Status = statusPending
-			return true
-		}
-		return false
 	}
 	panic("unexpected status: " + task.Status)
 }

From 4119313c442d20ea91f1c68e1fa7107e6fd41864 Mon Sep 17 00:00:00 2001
From: Steven Landers <steven.landers@gmail.com>
Date: Mon, 27 Nov 2023 20:40:38 -0500
Subject: [PATCH 27/65] remove another datarace

---
 tasks/scheduler_async.go | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tasks/scheduler_async.go b/tasks/scheduler_async.go
index 461cd0cb2..987335a81 100644
--- a/tasks/scheduler_async.go
+++ b/tasks/scheduler_async.go
@@ -58,7 +58,7 @@ func (s *scheduler) ProcessAll(ctx sdk.Context, reqs []*sdk.DeliverTxEntry) ([]t
 	active := atomic.Int32{}
 	wg := sync.WaitGroup{}
 	wg.Add(workers)
-	var final bool
+	final := atomic.Bool{}
 
 	for i := 0; i < workers; i++ {
 		go func(worker int) {
@@ -75,7 +75,7 @@ func (s *scheduler) ProcessAll(ctx sdk.Context, reqs []*sdk.DeliverTxEntry) ([]t
 				if s.processTask(t, ctx, queue, tasks) {
 					ch <- t.Index
 				} else {
-					final = false
+					final.Store(false)
 				}
 				active.Add(-1)
 			}
@@ -95,11 +95,11 @@ func (s *scheduler) ProcessAll(ctx sdk.Context, reqs []*sdk.DeliverTxEntry) ([]t
 			case <-ch:
 				// if all tasks are completed AND there are no more tasks in the queue
 				if active.Load() == 0 && queue.IsCompleted() {
-					if final {
+					if final.Load() {
 						return
 					}
 					// try one more validation of everything
-					final = true
+					final.Store(true)
 					for i := 0; i < len(tasks); i++ {
 						queue.AddValidationTask(i)
 					}

From 48969c3e08da38001fe3b962891ac0c23970d731 Mon Sep 17 00:00:00 2001
From: Steven Landers <steven.landers@gmail.com>
Date: Tue, 28 Nov 2023 09:14:22 -0500
Subject: [PATCH 28/65] fix hang

---
 tasks/scheduler_async.go | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/tasks/scheduler_async.go b/tasks/scheduler_async.go
index 987335a81..b9e7c4e32 100644
--- a/tasks/scheduler_async.go
+++ b/tasks/scheduler_async.go
@@ -14,12 +14,12 @@ import (
 // TODO: remove after things work
 func TaskLog(task *deliverTxTask, msg string) {
 	// helpful for debugging state transitions
-	fmt.Println(fmt.Sprintf("Task(%d\t%s):\t%s", task.Index, task.Status, msg))
+	//fmt.Println(fmt.Sprintf("Task(%d\t%s):\t%s", task.Index, task.Status, msg))
 }
 
 // TODO: remove after things work
 // waitWithMsg prints a message every 1s, so we can tell what's hanging
-func waitWithMsg(msg string) context.CancelFunc {
+func waitWithMsg(msg string, handlers ...func()) context.CancelFunc {
 	goctx, cancel := context.WithCancel(context.Background())
 	tick := time.NewTicker(1 * time.Second)
 	go func() {
@@ -29,6 +29,9 @@ func waitWithMsg(msg string) context.CancelFunc {
 				return
 			case <-tick.C:
 				fmt.Println(msg)
+				for _, h := range handlers {
+					h()
+				}
 			}
 		}
 	}()
@@ -65,15 +68,20 @@ func (s *scheduler) ProcessAll(ctx sdk.Context, reqs []*sdk.DeliverTxEntry) ([]t
 			defer wg.Done()
 
 			for {
-				nt := waitWithMsg(fmt.Sprintf("worker=%d: next task...(%d)", worker, active.Load()))
+				nt := waitWithMsg(fmt.Sprintf("worker=%d: next task...", worker), func() {
+					fmt.Println(fmt.Sprintf("worker=%d: active=%d", worker, active.Load()))
+				})
 				t, ok := queue.NextTask()
 				nt()
 				if !ok {
 					return
 				}
 				active.Add(1)
+
 				if s.processTask(t, ctx, queue, tasks) {
+					active.Add(-1)
 					ch <- t.Index
+					continue
 				} else {
 					final.Store(false)
 				}

From a7a9eaf40b11f9d7bd0cb2dd152dc0bb6499dc7d Mon Sep 17 00:00:00 2001
From: Steven Landers <steven.landers@gmail.com>
Date: Tue, 28 Nov 2023 09:47:28 -0500
Subject: [PATCH 29/65] avoid status datarace

---
 tasks/queue.go           |  4 +--
 tasks/scheduler.go       | 62 ++++++++++++++++++++++++----------------
 tasks/scheduler_async.go |  4 +--
 tasks/scheduler_test.go  |  1 -
 4 files changed, 42 insertions(+), 29 deletions(-)

diff --git a/tasks/queue.go b/tasks/queue.go
index 043067db1..41736af2a 100644
--- a/tasks/queue.go
+++ b/tasks/queue.go
@@ -73,7 +73,7 @@ func (sq *SchedulerQueue) ReValidate(idx int) {
 
 	TaskLog(sq.tasks[idx], "-> re-validate")
 	sq.tasks[idx].Abort = nil
-	sq.tasks[idx].Status = statusExecuted
+	sq.tasks[idx].SetStatus(statusExecuted)
 	sq.pushTask(idx)
 }
 
@@ -117,7 +117,7 @@ func (sq *SchedulerQueue) AddValidationTask(idx int) {
 	}
 
 	TaskLog(sq.tasks[idx], "-> validate")
-	sq.tasks[idx].Status = statusExecuted
+	sq.tasks[idx].SetStatus(statusExecuted)
 	sq.tasks[idx].Type = TypeValidation
 	sq.pushTask(idx)
 }
diff --git a/tasks/scheduler.go b/tasks/scheduler.go
index 99b73de05..bb3f78f80 100644
--- a/tasks/scheduler.go
+++ b/tasks/scheduler.go
@@ -42,9 +42,10 @@ const (
 type deliverTxTask struct {
 	Ctx     sdk.Context
 	AbortCh chan occ.Abort
+	mx      sync.RWMutex
 
 	Type          TaskType
-	Status        status
+	status        status
 	Dependencies  []int
 	Abort         *occ.Abort
 	Index         int
@@ -55,20 +56,40 @@ type deliverTxTask struct {
 	ValidateCh    chan struct{}
 }
 
+func (dt *deliverTxTask) SetStatus(s status) {
+	dt.mx.Lock()
+	defer dt.mx.Unlock()
+	dt.status = s
+}
+
+func (dt *deliverTxTask) Status() status {
+	dt.mx.RLock()
+	defer dt.mx.RUnlock()
+	return dt.status
+}
+
 func (dt *deliverTxTask) IsInvalid() bool {
-	return dt.Status == statusInvalid || dt.Status == statusAborted
+	dt.mx.RLock()
+	defer dt.mx.RUnlock()
+	return dt.status == statusInvalid || dt.status == statusAborted
 }
 
 func (dt *deliverTxTask) IsValid() bool {
-	return dt.Status == statusValidated
+	dt.mx.RLock()
+	defer dt.mx.RUnlock()
+	return dt.status == statusValidated
 }
 
 func (dt *deliverTxTask) IsWaiting() bool {
-	return dt.Status == statusWaiting
+	dt.mx.RLock()
+	defer dt.mx.RUnlock()
+	return dt.status == statusWaiting
 }
 
 func (dt *deliverTxTask) Reset() {
-	dt.Status = statusPending
+	dt.mx.Lock()
+	defer dt.mx.Unlock()
+	dt.status = statusPending
 	dt.Response = nil
 	dt.Abort = nil
 	dt.AbortCh = nil
@@ -77,7 +98,9 @@ func (dt *deliverTxTask) Reset() {
 }
 
 func (dt *deliverTxTask) ResetForExecution() {
-	dt.Status = statusPending
+	dt.mx.Lock()
+	defer dt.mx.Unlock()
+	dt.status = statusPending
 	dt.Type = TypeExecution
 	dt.Response = nil
 	dt.Abort = nil
@@ -149,7 +172,7 @@ func toTasks(reqs []*sdk.DeliverTxEntry) []*deliverTxTask {
 		res = append(res, &deliverTxTask{
 			Request:    r.Request,
 			Index:      idx,
-			Status:     statusPending,
+			status:     statusPending,
 			ValidateCh: make(chan struct{}, 1),
 		})
 	}
@@ -176,18 +199,9 @@ func (s *scheduler) tryInitMultiVersionStore(ctx sdk.Context) {
 	s.multiVersionStores = mvs
 }
 
-func indexesValidated(tasks []*deliverTxTask, idx []int) bool {
-	for _, i := range idx {
-		if tasks[i].Status != statusValidated {
-			return false
-		}
-	}
-	return true
-}
-
 func allValidated(tasks []*deliverTxTask) bool {
 	for _, t := range tasks {
-		if t.Status != statusValidated {
+		if t.Status() != statusValidated {
 			return false
 		}
 	}
@@ -239,7 +253,7 @@ func (s *scheduler) ProcessAllSync(ctx sdk.Context, reqs []*sdk.DeliverTxEntry)
 }
 
 func (s *scheduler) shouldRerun(task *deliverTxTask) bool {
-	switch task.Status {
+	switch task.Status() {
 
 	case statusAborted, statusPending:
 		return true
@@ -248,17 +262,17 @@ func (s *scheduler) shouldRerun(task *deliverTxTask) bool {
 	case statusExecuted, statusValidated:
 		if valid, conflicts := s.findConflicts(task); !valid {
 			s.invalidateTask(task)
-			task.Status = statusInvalid
+			task.SetStatus(statusInvalid)
 			return true
 		} else if len(conflicts) == 0 {
 			// mark as validated, which will avoid re-validating unless a lower-index re-validates
-			task.Status = statusValidated
+			task.SetStatus(statusValidated)
 			return false
 		}
 		// conflicts and valid, so it'll validate next time
 		return false
 	}
-	panic("unexpected status: " + task.Status)
+	panic("unexpected status: " + task.Status())
 }
 
 func (s *scheduler) validateTask(ctx sdk.Context, task *deliverTxTask) bool {
@@ -273,7 +287,7 @@ func (s *scheduler) validateTask(ctx sdk.Context, task *deliverTxTask) bool {
 
 func (s *scheduler) findFirstNonValidated() (int, bool) {
 	for i, t := range s.allTasks {
-		if t.Status != statusValidated {
+		if t.Status() != statusValidated {
 			return i, true
 		}
 	}
@@ -438,7 +452,7 @@ func (s *scheduler) executeTask(ctx sdk.Context, task *deliverTxTask) {
 	close(task.AbortCh)
 
 	if abt, ok := <-task.AbortCh; ok {
-		task.Status = statusAborted
+		task.SetStatus(statusAborted)
 		task.Abort = &abt
 		return
 	}
@@ -448,6 +462,6 @@ func (s *scheduler) executeTask(ctx sdk.Context, task *deliverTxTask) {
 		v.WriteToMultiVersionStore()
 	}
 
-	task.Status = statusExecuted
+	task.SetStatus(statusExecuted)
 	task.Response = &resp
 }
diff --git a/tasks/scheduler_async.go b/tasks/scheduler_async.go
index b9e7c4e32..280643c36 100644
--- a/tasks/scheduler_async.go
+++ b/tasks/scheduler_async.go
@@ -131,10 +131,10 @@ func (s *scheduler) processTask(t *deliverTxTask, ctx sdk.Context, queue *Schedu
 		s.validateTask(ctx, t)
 
 		// check the outcome of validation and do things accordingly
-		switch t.Status {
+		switch t.Status() {
 		case statusValidated:
 			// task is possibly finished (can be re-validated by others)
-			TaskLog(t, "VALIDATED")
+			TaskLog(t, "VALIDATED (possibly finished)")
 			queue.SetToIdle(t.Index)
 			return true
 		case statusWaiting, statusExecuted:
diff --git a/tasks/scheduler_test.go b/tasks/scheduler_test.go
index b9ffe0aa1..bd0ac537d 100644
--- a/tasks/scheduler_test.go
+++ b/tasks/scheduler_test.go
@@ -122,7 +122,6 @@ func TestProcessAll(t *testing.T) {
 		t.Run(tt.name, func(t *testing.T) {
 			for i := 0; i < tt.runs; i++ {
 				// set a tracer provider
-				fmt.Println("**************************** RUN ", i, "****************************")
 				tp := trace.NewNoopTracerProvider()
 				otel.SetTracerProvider(trace.NewNoopTracerProvider())
 				tr := tp.Tracer("scheduler-test")

From 9e4a27955342a058193cd4b3d503e737713ffcea Mon Sep 17 00:00:00 2001
From: Steven Landers <steven.landers@gmail.com>
Date: Tue, 28 Nov 2023 10:22:24 -0500
Subject: [PATCH 30/65] fix hang

---
 tasks/scheduler_async.go | 55 +++++++++++++++++-----------------------
 tasks/scheduler_test.go  | 31 ++++++++++++++++++++++
 2 files changed, 54 insertions(+), 32 deletions(-)

diff --git a/tasks/scheduler_async.go b/tasks/scheduler_async.go
index 280643c36..b61d58d94 100644
--- a/tasks/scheduler_async.go
+++ b/tasks/scheduler_async.go
@@ -57,17 +57,37 @@ func (s *scheduler) ProcessAll(ctx sdk.Context, reqs []*sdk.DeliverTxEntry) ([]t
 		queue.AddExecutionTask(t.Index)
 	}
 
-	ch := make(chan int, len(tasks))
 	active := atomic.Int32{}
 	wg := sync.WaitGroup{}
 	wg.Add(workers)
 	final := atomic.Bool{}
+	finisher := sync.Once{}
+	mx := sync.Mutex{}
 
 	for i := 0; i < workers; i++ {
 		go func(worker int) {
 			defer wg.Done()
 
 			for {
+
+				// check if all tasks are complete AND not running anything
+				mx.Lock()
+				if active.Load() == 0 && queue.IsCompleted() {
+					if final.Load() {
+						finisher.Do(func() {
+							queue.Close()
+						})
+					} else {
+						// try one more validation of everything at end
+						final.Store(true)
+						for i := 0; i < len(tasks); i++ {
+							queue.AddValidationTask(i)
+						}
+					}
+				}
+				mx.Unlock()
+
+				//TODO: remove once we feel good about this not hanging
 				nt := waitWithMsg(fmt.Sprintf("worker=%d: next task...", worker), func() {
 					fmt.Println(fmt.Sprintf("worker=%d: active=%d", worker, active.Load()))
 				})
@@ -77,12 +97,8 @@ func (s *scheduler) ProcessAll(ctx sdk.Context, reqs []*sdk.DeliverTxEntry) ([]t
 					return
 				}
 				active.Add(1)
-
-				if s.processTask(t, ctx, queue, tasks) {
-					active.Add(-1)
-					ch <- t.Index
-					continue
-				} else {
+				if !s.processTask(t, ctx, queue, tasks) {
+					// if anything doesn't validate successfully, we will need a final re-sweep
 					final.Store(false)
 				}
 				active.Add(-1)
@@ -91,31 +107,6 @@ func (s *scheduler) ProcessAll(ctx sdk.Context, reqs []*sdk.DeliverTxEntry) ([]t
 		}(i)
 	}
 
-	wg.Add(1)
-	go func() {
-		defer close(ch)
-		defer wg.Done()
-		defer queue.Close()
-		for {
-			select {
-			case <-ctx.Context().Done():
-				return
-			case <-ch:
-				// if all tasks are completed AND there are no more tasks in the queue
-				if active.Load() == 0 && queue.IsCompleted() {
-					if final.Load() {
-						return
-					}
-					// try one more validation of everything
-					final.Store(true)
-					for i := 0; i < len(tasks); i++ {
-						queue.AddValidationTask(i)
-					}
-				}
-			}
-		}
-	}()
-
 	wg.Wait()
 
 	for _, mv := range s.multiVersionStores {
diff --git a/tasks/scheduler_test.go b/tasks/scheduler_test.go
index bd0ac537d..a5be26154 100644
--- a/tasks/scheduler_test.go
+++ b/tasks/scheduler_test.go
@@ -63,6 +63,37 @@ func TestProcessAll(t *testing.T) {
 		expectedErr   error
 		assertions    func(t *testing.T, ctx sdk.Context, res []types.ResponseDeliverTx)
 	}{
+		{
+			name:      "Test no overlap txs",
+			workers:   50,
+			runs:      50,
+			addStores: true,
+			requests:  requestList(100),
+			deliverTxFunc: func(ctx sdk.Context, req types.RequestDeliverTx) types.ResponseDeliverTx {
+				// all txs read and write to the same key to maximize conflicts
+				kv := ctx.MultiStore().GetKVStore(testStoreKey)
+
+				// write to the store with this tx's index
+				kv.Set(req.Tx, req.Tx)
+				val := string(kv.Get(req.Tx))
+
+				// return what was read from the store (final attempt should be index-1)
+				return types.ResponseDeliverTx{
+					Info: val,
+				}
+			},
+			assertions: func(t *testing.T, ctx sdk.Context, res []types.ResponseDeliverTx) {
+				for idx, response := range res {
+					require.Equal(t, fmt.Sprintf("%d", idx), response.Info)
+				}
+				store := ctx.MultiStore().GetKVStore(testStoreKey)
+				for i := 0; i < len(res); i++ {
+					val := store.Get([]byte(fmt.Sprintf("%d", i)))
+					require.Equal(t, []byte(fmt.Sprintf("%d", i)), val)
+				}
+			},
+			expectedErr: nil,
+		},
 		{
 			name:      "Test every tx accesses same key",
 			workers:   50,

From 014f4979685e56b81074808ed054d26c59c18e61 Mon Sep 17 00:00:00 2001
From: Steven Landers <steven.landers@gmail.com>
Date: Wed, 29 Nov 2023 09:57:52 -0500
Subject: [PATCH 31/65] rebase sync version

---
 tasks/scheduler.go       | 176 +++++++++++++++++++++++----------------
 tasks/scheduler_async.go |   8 +-
 2 files changed, 107 insertions(+), 77 deletions(-)

diff --git a/tasks/scheduler.go b/tasks/scheduler.go
index bb3f78f80..c382b536e 100644
--- a/tasks/scheduler.go
+++ b/tasks/scheduler.go
@@ -1,6 +1,7 @@
 package tasks
 
 import (
+	"context"
 	"crypto/sha256"
 	"fmt"
 	"sort"
@@ -9,7 +10,6 @@ import (
 	"github.com/tendermint/tendermint/abci/types"
 	"go.opentelemetry.io/otel/attribute"
 	"go.opentelemetry.io/otel/trace"
-	"golang.org/x/sync/errgroup"
 
 	"github.com/cosmos/cosmos-sdk/store/multiversion"
 	store "github.com/cosmos/cosmos-sdk/store/types"
@@ -33,7 +33,7 @@ const (
 	// statusValidated means the task has been validated
 	// tasks in this status can be reset if an earlier task fails validation
 	statusValidated status = "validated"
-	// statusInvalid means the task has been validated and is not valid
+	// statusInvalid means the task has been invalidated
 	statusInvalid status = "invalid"
 	// statusWaiting tasks are waiting for another tx to complete
 	statusWaiting status = "waiting"
@@ -53,7 +53,7 @@ type deliverTxTask struct {
 	Request       types.RequestDeliverTx
 	Response      *types.ResponseDeliverTx
 	VersionStores map[sdk.StoreKey]*multiversion.VersionIndexedStore
-	ValidateCh    chan struct{}
+	ValidateCh    chan status
 }
 
 func (dt *deliverTxTask) SetStatus(s status) {
@@ -107,19 +107,17 @@ func (dt *deliverTxTask) ResetForExecution() {
 	dt.AbortCh = nil
 	dt.Dependencies = nil
 	dt.VersionStores = nil
-	dt.Incarnation++
-	dt.ValidateCh = make(chan struct{}, 1)
 }
 
 func (dt *deliverTxTask) Increment() {
 	dt.Incarnation++
-	dt.ValidateCh = make(chan struct{}, 1)
+	dt.ValidateCh = make(chan status, 1)
 }
 
 // Scheduler processes tasks concurrently
 type Scheduler interface {
-	ProcessAllSync(ctx sdk.Context, reqs []*sdk.DeliverTxEntry) ([]types.ResponseDeliverTx, error)
 	ProcessAll(ctx sdk.Context, reqs []*sdk.DeliverTxEntry) ([]types.ResponseDeliverTx, error)
+	ProcessAllSync(ctx sdk.Context, reqs []*sdk.DeliverTxEntry) ([]types.ResponseDeliverTx, error)
 }
 
 type scheduler struct {
@@ -128,6 +126,8 @@ type scheduler struct {
 	multiVersionStores map[sdk.StoreKey]multiversion.MultiVersionStore
 	tracingInfo        *tracing.Info
 	allTasks           []*deliverTxTask
+	executeCh          chan func()
+	validateCh         chan func()
 }
 
 // NewScheduler creates a new scheduler
@@ -147,6 +147,29 @@ func (s *scheduler) invalidateTask(task *deliverTxTask) {
 	}
 }
 
+func start(ctx context.Context, ch chan func(), workers int) {
+	for i := 0; i < workers; i++ {
+		go func() {
+			for {
+				select {
+				case <-ctx.Done():
+					return
+				case work := <-ch:
+					work()
+				}
+			}
+		}()
+	}
+}
+
+func (s *scheduler) DoValidate(work func()) {
+	s.validateCh <- work
+}
+
+func (s *scheduler) DoExecute(work func()) {
+	s.executeCh <- work
+}
+
 func (s *scheduler) findConflicts(task *deliverTxTask) (bool, []int) {
 	var conflicts []int
 	uniq := make(map[int]struct{})
@@ -166,14 +189,15 @@ func (s *scheduler) findConflicts(task *deliverTxTask) (bool, []int) {
 	return valid, conflicts
 }
 
-func toTasks(reqs []*sdk.DeliverTxEntry) []*deliverTxTask {
+func toTasks(ctx sdk.Context, reqs []*sdk.DeliverTxEntry) []*deliverTxTask {
 	res := make([]*deliverTxTask, 0, len(reqs))
 	for idx, r := range reqs {
 		res = append(res, &deliverTxTask{
 			Request:    r.Request,
 			Index:      idx,
+			Ctx:        ctx,
 			status:     statusPending,
-			ValidateCh: make(chan struct{}, 1),
+			ValidateCh: make(chan status, 1),
 		})
 	}
 	return res
@@ -208,7 +232,7 @@ func allValidated(tasks []*deliverTxTask) bool {
 	return true
 }
 
-func (s *scheduler) PrefillEstimates(ctx sdk.Context, reqs []*sdk.DeliverTxEntry) {
+func (s *scheduler) PrefillEstimates(reqs []*sdk.DeliverTxEntry) {
 	// iterate over TXs, update estimated writesets where applicable
 	for i, req := range reqs {
 		mappedWritesets := req.EstimatedWritesets
@@ -224,9 +248,27 @@ func (s *scheduler) ProcessAllSync(ctx sdk.Context, reqs []*sdk.DeliverTxEntry)
 	// initialize mutli-version stores if they haven't been initialized yet
 	s.tryInitMultiVersionStore(ctx)
 	// prefill estimates
-	s.PrefillEstimates(ctx, reqs)
-	tasks := toTasks(reqs)
+	s.PrefillEstimates(reqs)
+	tasks := toTasks(ctx, reqs)
 	s.allTasks = tasks
+	s.executeCh = make(chan func(), len(tasks))
+	s.validateCh = make(chan func(), len(tasks))
+
+	// default to number of tasks if workers is negative or 0 by this point
+	workers := s.workers
+	if s.workers < 1 {
+		workers = len(tasks)
+	}
+
+	workerCtx, cancel := context.WithCancel(ctx.Context())
+	defer cancel()
+
+	// execution tasks are limited by workers
+	start(workerCtx, s.executeCh, workers)
+
+	// validation tasks uses length of tasks to avoid blocking on validation
+	start(workerCtx, s.validateCh, len(tasks))
+
 	toExecute := tasks
 	for !allValidated(tasks) {
 		var err error
@@ -301,19 +343,26 @@ func (s *scheduler) validateAll(ctx sdk.Context, tasks []*deliverTxTask) ([]*del
 	var mx sync.Mutex
 	var res []*deliverTxTask
 
+	startIdx, anyLeft := s.findFirstNonValidated()
+
+	if !anyLeft {
+		return nil, nil
+	}
+
 	wg := sync.WaitGroup{}
-	for i := 0; i < len(tasks); i++ {
+	for i := startIdx; i < len(tasks); i++ {
+		t := tasks[i]
 		wg.Add(1)
-		go func(task *deliverTxTask) {
+		s.DoValidate(func() {
 			defer wg.Done()
-			if !s.validateTask(ctx, task) {
-				task.Reset()
-				task.Increment()
+			if !s.validateTask(ctx, t) {
+				t.Reset()
+				t.Increment()
 				mx.Lock()
-				res = append(res, task)
+				res = append(res, t)
 				mx.Unlock()
 			}
-		}(tasks[i])
+		})
 	}
 	wg.Wait()
 
@@ -321,56 +370,47 @@ func (s *scheduler) validateAll(ctx sdk.Context, tasks []*deliverTxTask) ([]*del
 }
 
 // ExecuteAll executes all tasks concurrently
-// Tasks are updated with their status
-// TODO: error scenarios
 func (s *scheduler) executeAll(ctx sdk.Context, tasks []*deliverTxTask) error {
 	ctx, span := s.traceSpan(ctx, "SchedulerExecuteAll", nil)
 	defer span.End()
 
-	ch := make(chan *deliverTxTask, len(tasks))
-	grp, gCtx := errgroup.WithContext(ctx.Context())
-
-	// a workers value < 1 means no limit
-	workers := s.workers
-	if s.workers < 1 {
-		workers = len(tasks)
-	}
-
 	// validationWg waits for all validations to complete
 	// validations happen in separate goroutines in order to wait on previous index
 	validationWg := &sync.WaitGroup{}
 	validationWg.Add(len(tasks))
-	grp.Go(func() error {
-		validationWg.Wait()
-		return nil
-	})
 
-	for i := 0; i < workers; i++ {
-		grp.Go(func() error {
-			for {
-				select {
-				case <-gCtx.Done():
-					return gCtx.Err()
-				case task, ok := <-ch:
-					if !ok {
-						return nil
-					}
-					s.prepareAndRunTask(validationWg, ctx, task)
-				}
-			}
+	for _, task := range tasks {
+		t := task
+		s.DoExecute(func() {
+			s.prepareAndRunTask(validationWg, ctx, t)
 		})
 	}
 
-	for _, task := range tasks {
-		ch <- task
-	}
-	close(ch)
+	validationWg.Wait()
 
-	if err := grp.Wait(); err != nil {
-		return err
+	return nil
+}
+
+func (s *scheduler) waitOnPreviousAndValidate(wg *sync.WaitGroup, task *deliverTxTask) {
+	defer wg.Done()
+	defer close(task.ValidateCh)
+	// wait on previous task to finish validation
+	// if a previous task fails validation, then subsequent should fail too (cascade)
+	if task.Index > 0 {
+		res, ok := <-s.allTasks[task.Index-1].ValidateCh
+		if ok && res != statusValidated {
+			task.Reset()
+			task.ValidateCh <- task.Status()
+			return
+		}
+	}
+	// if not validated, reset the task
+	if !s.validateTask(task.Ctx, task) {
+		task.Reset()
 	}
 
-	return nil
+	// notify next task of this one's status
+	task.ValidateCh <- task.Status()
 }
 
 func (s *scheduler) prepareAndRunTask(wg *sync.WaitGroup, ctx sdk.Context, task *deliverTxTask) {
@@ -378,19 +418,12 @@ func (s *scheduler) prepareAndRunTask(wg *sync.WaitGroup, ctx sdk.Context, task
 	defer eSpan.End()
 	task.Ctx = eCtx
 
-	s.executeTask(task.Ctx, task)
-	go func() {
-		defer wg.Done()
-		defer close(task.ValidateCh)
-		// wait on previous task to finish validation
-		if task.Index > 0 {
-			<-s.allTasks[task.Index-1].ValidateCh
-		}
-		if !s.validateTask(task.Ctx, task) {
-			task.Reset()
-		}
-		task.ValidateCh <- struct{}{}
-	}()
+	s.prepareTask(task)
+	s.executeTask(task)
+
+	s.DoValidate(func() {
+		s.waitOnPreviousAndValidate(wg, task)
+	})
 }
 
 func (s *scheduler) traceSpan(ctx sdk.Context, name string, task *deliverTxTask) (sdk.Context, trace.Span) {
@@ -405,8 +438,8 @@ func (s *scheduler) traceSpan(ctx sdk.Context, name string, task *deliverTxTask)
 }
 
 // prepareTask initializes the context and version stores for a task
-func (s *scheduler) prepareTask(ctx sdk.Context, task *deliverTxTask) {
-	ctx = ctx.WithTxIndex(task.Index)
+func (s *scheduler) prepareTask(task *deliverTxTask) {
+	ctx := task.Ctx.WithTxIndex(task.Index)
 
 	_, span := s.traceSpan(ctx, "SchedulerPrepare", task)
 	defer span.End()
@@ -439,10 +472,7 @@ func (s *scheduler) prepareTask(ctx sdk.Context, task *deliverTxTask) {
 }
 
 // executeTask executes a single task
-func (s *scheduler) executeTask(ctx sdk.Context, task *deliverTxTask) {
-
-	s.prepareTask(ctx, task)
-
+func (s *scheduler) executeTask(task *deliverTxTask) {
 	dCtx, dSpan := s.traceSpan(task.Ctx, "SchedulerDeliverTx", task)
 	defer dSpan.End()
 	task.Ctx = dCtx
diff --git a/tasks/scheduler_async.go b/tasks/scheduler_async.go
index b61d58d94..f7487ae2b 100644
--- a/tasks/scheduler_async.go
+++ b/tasks/scheduler_async.go
@@ -42,8 +42,8 @@ func (s *scheduler) ProcessAll(ctx sdk.Context, reqs []*sdk.DeliverTxEntry) ([]t
 	// initialize mutli-version stores if they haven't been initialized yet
 	s.tryInitMultiVersionStore(ctx)
 	// prefill estimates
-	s.PrefillEstimates(ctx, reqs)
-	tasks := toTasks(reqs)
+	s.PrefillEstimates(reqs)
+	tasks := toTasks(ctx, reqs)
 	s.allTasks = tasks
 
 	workers := s.workers
@@ -147,8 +147,8 @@ func (s *scheduler) processTask(t *deliverTxTask, ctx sdk.Context, queue *Schedu
 
 	case TypeExecution:
 		TaskLog(t, "execute")
-
-		s.executeTask(ctx, t)
+		s.prepareTask(t)
+		s.executeTask(t)
 		queue.ValidateExecutedTask(t.Index)
 	default:
 		TaskLog(t, "unexpected type")

From 01419694f72da97049787ca6605785c70aa1ed23 Mon Sep 17 00:00:00 2001
From: Steven Landers <steven.landers@gmail.com>
Date: Wed, 29 Nov 2023 10:29:07 -0500
Subject: [PATCH 32/65] protect task type transition and reads

---
 tasks/queue.go           | 23 +++++++++++++----------
 tasks/queue_test.go      | 20 ++++++++++----------
 tasks/scheduler.go       | 34 +++++++++++++++++++++++++++++++---
 tasks/scheduler_async.go | 14 +++++++++-----
 4 files changed, 63 insertions(+), 28 deletions(-)

diff --git a/tasks/queue.go b/tasks/queue.go
index 41736af2a..51c45872d 100644
--- a/tasks/queue.go
+++ b/tasks/queue.go
@@ -5,10 +5,10 @@ import (
 	"sync"
 )
 
-type TaskType int
+type taskType int
 
 const (
-	TypeIdle TaskType = iota
+	TypeIdle taskType = iota
 	TypeExecution
 	TypeValidation
 )
@@ -47,7 +47,7 @@ func (sq *SchedulerQueue) Unlock() {
 func (sq *SchedulerQueue) SetToIdle(idx int) {
 	sq.Lock()
 	defer sq.Unlock()
-	sq.tasks[idx].Type = TypeIdle
+	sq.tasks[idx].SetTaskType(TypeIdle)
 	sq.active.Delete(idx)
 }
 
@@ -67,7 +67,7 @@ func (sq *SchedulerQueue) ReValidate(idx int) {
 	sq.Lock()
 	defer sq.Unlock()
 
-	if sq.tasks[idx].Type != TypeValidation {
+	if sq.tasks[idx].TaskType() != TypeValidation {
 		panic("trying to re-validate a task not in validation state")
 	}
 
@@ -83,7 +83,7 @@ func (sq *SchedulerQueue) IsCompleted() bool {
 
 	if len(*sq.queue) == 0 {
 		for _, t := range sq.tasks {
-			if !t.IsValid() || t.Type != TypeIdle {
+			if !t.IsValid() || !t.IsIdle() {
 				TaskLog(t, "not valid or not idle")
 				return false
 			}
@@ -93,20 +93,24 @@ func (sq *SchedulerQueue) IsCompleted() bool {
 	return false
 }
 
+// ValidateExecutedTask adds a task to the validation queue IFF it just executed
+// this allows us to transition to validation without making it eligible for something else
+// to add it to validation
 func (sq *SchedulerQueue) ValidateExecutedTask(idx int) {
 	sq.Lock()
 	defer sq.Unlock()
 
-	if sq.tasks[idx].Type != TypeExecution {
+	if !sq.tasks[idx].IsTaskType(TypeExecution) {
 		TaskLog(sq.tasks[idx], "not in execution")
 		panic("trying to validate a task not in execution")
 	}
 
 	TaskLog(sq.tasks[idx], "-> validate")
-	sq.tasks[idx].Type = TypeValidation
+	sq.tasks[idx].SetTaskType(TypeValidation)
 	sq.pushTask(idx)
 }
 
+// AddValidationTask adds a task to the validation queue IF NOT ALREADY in a queue
 func (sq *SchedulerQueue) AddValidationTask(idx int) {
 	sq.Lock()
 	defer sq.Unlock()
@@ -118,7 +122,7 @@ func (sq *SchedulerQueue) AddValidationTask(idx int) {
 
 	TaskLog(sq.tasks[idx], "-> validate")
 	sq.tasks[idx].SetStatus(statusExecuted)
-	sq.tasks[idx].Type = TypeValidation
+	sq.tasks[idx].SetTaskType(TypeValidation)
 	sq.pushTask(idx)
 }
 
@@ -138,8 +142,7 @@ func (sq *SchedulerQueue) AddExecutionTask(idx int) {
 	}
 
 	TaskLog(sq.tasks[idx], "-> execute")
-
-	sq.tasks[idx].Type = TypeExecution
+	sq.tasks[idx].SetTaskType(TypeExecution)
 	sq.pushTask(idx)
 }
 
diff --git a/tasks/queue_test.go b/tasks/queue_test.go
index a6ecdb142..07ddf3003 100644
--- a/tasks/queue_test.go
+++ b/tasks/queue_test.go
@@ -27,8 +27,8 @@ func TestAddValidationTask(t *testing.T) {
 
 	sq.AddValidationTask(1)
 
-	if sq.tasks[1].Type != TypeValidation {
-		t.Errorf("Expected task type %d, but got %d", TypeValidation, sq.tasks[1].Type)
+	if !sq.tasks[1].IsTaskType(TypeValidation) {
+		t.Errorf("Expected task type %d, but got %d", TypeValidation, sq.tasks[1].TaskType())
 	}
 }
 
@@ -38,8 +38,8 @@ func TestAddExecutionTask(t *testing.T) {
 
 	sq.AddExecutionTask(1)
 
-	if sq.tasks[1].Type != TypeExecution {
-		t.Errorf("Expected task type %d, but got %d", TypeExecution, sq.tasks[1].Type)
+	if !sq.tasks[1].IsTaskType(TypeExecution) {
+		t.Errorf("Expected task type %d, but got %d", TypeExecution, sq.tasks[1].TaskType())
 	}
 }
 
@@ -50,8 +50,8 @@ func TestSetToIdle(t *testing.T) {
 	sq.AddExecutionTask(1)
 	sq.SetToIdle(1)
 
-	if sq.tasks[1].Type != TypeIdle {
-		t.Errorf("Expected task type %d, but got %d", TypeIdle, sq.tasks[1].Type)
+	if !sq.tasks[1].IsTaskType(TypeIdle) {
+		t.Errorf("Expected task type %d, but got %d", TypeIdle, sq.tasks[1].TaskType())
 	}
 }
 
@@ -105,8 +105,8 @@ func TestAddValidationTaskWhenActive(t *testing.T) {
 	sq.AddValidationTask(1)
 
 	// Verify that the task's type is still TypeExecution
-	if sq.tasks[1].Type != TypeExecution {
-		t.Errorf("Expected task type %d, but got %d", TypeExecution, sq.tasks[1].Type)
+	if !sq.tasks[1].IsTaskType(TypeExecution) {
+		t.Errorf("Expected task type %d, but got %d", TypeExecution, sq.tasks[1].TaskType())
 	}
 
 	// Add task to validation queue
@@ -115,7 +115,7 @@ func TestAddValidationTaskWhenActive(t *testing.T) {
 	sq.AddValidationTask(2)
 
 	// Verify that the task's type is still TypeValidation
-	if sq.tasks[2].Type != TypeValidation {
-		t.Errorf("Expected task type %d, but got %d", TypeValidation, sq.tasks[2].Type)
+	if !sq.tasks[2].IsTaskType(TypeValidation) {
+		t.Errorf("Expected task type %d, but got %d", TypeValidation, sq.tasks[2].TaskType())
 	}
 }
diff --git a/tasks/scheduler.go b/tasks/scheduler.go
index c382b536e..ad730148a 100644
--- a/tasks/scheduler.go
+++ b/tasks/scheduler.go
@@ -44,7 +44,7 @@ type deliverTxTask struct {
 	AbortCh chan occ.Abort
 	mx      sync.RWMutex
 
-	Type          TaskType
+	taskType      taskType
 	status        status
 	Dependencies  []int
 	Abort         *occ.Abort
@@ -56,6 +56,34 @@ type deliverTxTask struct {
 	ValidateCh    chan status
 }
 
+func (dt *deliverTxTask) SetTaskType(t taskType) {
+	dt.mx.Lock()
+	defer dt.mx.Unlock()
+	dt.taskType = t
+}
+
+func (dt *deliverTxTask) IsIdle() bool {
+	return dt.IsTaskType(TypeIdle)
+}
+
+func (dt *deliverTxTask) IsTaskType(t taskType) bool {
+	dt.mx.RLock()
+	defer dt.mx.RUnlock()
+	return dt.taskType == t
+}
+
+func (dt *deliverTxTask) IsStatus(s status) bool {
+	dt.mx.RLock()
+	defer dt.mx.RUnlock()
+	return dt.status == s
+}
+
+func (dt *deliverTxTask) TaskType() taskType {
+	dt.mx.RLock()
+	defer dt.mx.RUnlock()
+	return dt.taskType
+}
+
 func (dt *deliverTxTask) SetStatus(s status) {
 	dt.mx.Lock()
 	defer dt.mx.Unlock()
@@ -101,7 +129,7 @@ func (dt *deliverTxTask) ResetForExecution() {
 	dt.mx.Lock()
 	defer dt.mx.Unlock()
 	dt.status = statusPending
-	dt.Type = TypeExecution
+	dt.taskType = TypeExecution
 	dt.Response = nil
 	dt.Abort = nil
 	dt.AbortCh = nil
@@ -225,7 +253,7 @@ func (s *scheduler) tryInitMultiVersionStore(ctx sdk.Context) {
 
 func allValidated(tasks []*deliverTxTask) bool {
 	for _, t := range tasks {
-		if t.Status() != statusValidated {
+		if !t.IsStatus(statusValidated) {
 			return false
 		}
 	}
diff --git a/tasks/scheduler_async.go b/tasks/scheduler_async.go
index f7487ae2b..1cda5b0e6 100644
--- a/tasks/scheduler_async.go
+++ b/tasks/scheduler_async.go
@@ -116,7 +116,7 @@ func (s *scheduler) ProcessAll(ctx sdk.Context, reqs []*sdk.DeliverTxEntry) ([]t
 }
 
 func (s *scheduler) processTask(t *deliverTxTask, ctx sdk.Context, queue *SchedulerQueue, tasks []*deliverTxTask) bool {
-	switch t.Type {
+	switch t.TaskType() {
 	case TypeValidation:
 		TaskLog(t, "validate")
 		s.validateTask(ctx, t)
@@ -130,6 +130,7 @@ func (s *scheduler) processTask(t *deliverTxTask, ctx sdk.Context, queue *Schedu
 			return true
 		case statusWaiting, statusExecuted:
 			// task should be re-validated (waiting on others)
+			// how can we wait on dependencies?
 			queue.ReValidate(t.Index)
 		case statusInvalid:
 			// task should be re-executed along with all +1 tasks
@@ -137,9 +138,6 @@ func (s *scheduler) processTask(t *deliverTxTask, ctx sdk.Context, queue *Schedu
 			for i := t.Index + 1; i < len(tasks); i++ {
 				queue.AddValidationTask(i)
 			}
-		case statusAborted:
-			// task should be re-executed
-			queue.ReExecute(t.Index)
 		default:
 			TaskLog(t, "unexpected status")
 			panic("unexpected status ")
@@ -149,7 +147,13 @@ func (s *scheduler) processTask(t *deliverTxTask, ctx sdk.Context, queue *Schedu
 		TaskLog(t, "execute")
 		s.prepareTask(t)
 		s.executeTask(t)
-		queue.ValidateExecutedTask(t.Index)
+
+		if t.Status() == statusAborted {
+			queue.ReExecute(t.Index)
+		} else {
+			queue.ValidateExecutedTask(t.Index)
+		}
+
 	default:
 		TaskLog(t, "unexpected type")
 		panic("unexpected type ")

From ff52c478a2222b0fb1b79f1a57c0c41d6b4df6bd Mon Sep 17 00:00:00 2001
From: Steven Landers <steven.landers@gmail.com>
Date: Wed, 29 Nov 2023 14:03:43 -0500
Subject: [PATCH 33/65] refactor before fix

---
 tasks/heap.go                               |  19 -
 tasks/scheduler.go                          | 551 ++++----------------
 tasks/scheduler_async.go                    | 162 ------
 tasks/task.go                               | 143 +++++
 tasks/task_execution.go                     |  67 +++
 tasks/{queue.go => task_queue.go}           |  43 +-
 tasks/{queue_test.go => task_queue_test.go} |  36 --
 tasks/task_validation.go                    |  58 +++
 tasks/utils.go                              |  95 ++++
 9 files changed, 487 insertions(+), 687 deletions(-)
 delete mode 100644 tasks/heap.go
 delete mode 100644 tasks/scheduler_async.go
 create mode 100644 tasks/task.go
 create mode 100644 tasks/task_execution.go
 rename tasks/{queue.go => task_queue.go} (79%)
 rename tasks/{queue_test.go => task_queue_test.go} (63%)
 create mode 100644 tasks/task_validation.go
 create mode 100644 tasks/utils.go

diff --git a/tasks/heap.go b/tasks/heap.go
deleted file mode 100644
index 902615a91..000000000
--- a/tasks/heap.go
+++ /dev/null
@@ -1,19 +0,0 @@
-package tasks
-
-type taskHeap []int
-
-func (h taskHeap) Len() int           { return len(h) }
-func (h taskHeap) Less(i, j int) bool { return h[i] < h[j] }
-func (h taskHeap) Swap(i, j int)      { h[i], h[j] = h[j], h[i] }
-
-func (h *taskHeap) Push(x interface{}) {
-	*h = append(*h, x.(int))
-}
-
-func (h *taskHeap) Pop() interface{} {
-	old := *h
-	n := len(old)
-	x := old[n-1]
-	*h = old[0 : n-1]
-	return x
-}
diff --git a/tasks/scheduler.go b/tasks/scheduler.go
index ad730148a..55fc0ceb3 100644
--- a/tasks/scheduler.go
+++ b/tasks/scheduler.go
@@ -1,151 +1,19 @@
 package tasks
 
 import (
-	"context"
-	"crypto/sha256"
 	"fmt"
-	"sort"
 	"sync"
-
-	"github.com/tendermint/tendermint/abci/types"
-	"go.opentelemetry.io/otel/attribute"
-	"go.opentelemetry.io/otel/trace"
+	"sync/atomic"
 
 	"github.com/cosmos/cosmos-sdk/store/multiversion"
-	store "github.com/cosmos/cosmos-sdk/store/types"
 	sdk "github.com/cosmos/cosmos-sdk/types"
-	"github.com/cosmos/cosmos-sdk/types/occ"
 	"github.com/cosmos/cosmos-sdk/utils/tracing"
+	"github.com/tendermint/tendermint/abci/types"
 )
 
-type status string
-
-const (
-	// statusPending tasks are ready for execution
-	// all executing tasks are in pending state
-	statusPending status = "pending"
-	// statusExecuted tasks are ready for validation
-	// these tasks did not abort during execution
-	statusExecuted status = "executed"
-	// statusAborted means the task has been aborted
-	// these tasks transition to pending upon next execution
-	statusAborted status = "aborted"
-	// statusValidated means the task has been validated
-	// tasks in this status can be reset if an earlier task fails validation
-	statusValidated status = "validated"
-	// statusInvalid means the task has been invalidated
-	statusInvalid status = "invalid"
-	// statusWaiting tasks are waiting for another tx to complete
-	statusWaiting status = "waiting"
-)
-
-type deliverTxTask struct {
-	Ctx     sdk.Context
-	AbortCh chan occ.Abort
-	mx      sync.RWMutex
-
-	taskType      taskType
-	status        status
-	Dependencies  []int
-	Abort         *occ.Abort
-	Index         int
-	Incarnation   int
-	Request       types.RequestDeliverTx
-	Response      *types.ResponseDeliverTx
-	VersionStores map[sdk.StoreKey]*multiversion.VersionIndexedStore
-	ValidateCh    chan status
-}
-
-func (dt *deliverTxTask) SetTaskType(t taskType) {
-	dt.mx.Lock()
-	defer dt.mx.Unlock()
-	dt.taskType = t
-}
-
-func (dt *deliverTxTask) IsIdle() bool {
-	return dt.IsTaskType(TypeIdle)
-}
-
-func (dt *deliverTxTask) IsTaskType(t taskType) bool {
-	dt.mx.RLock()
-	defer dt.mx.RUnlock()
-	return dt.taskType == t
-}
-
-func (dt *deliverTxTask) IsStatus(s status) bool {
-	dt.mx.RLock()
-	defer dt.mx.RUnlock()
-	return dt.status == s
-}
-
-func (dt *deliverTxTask) TaskType() taskType {
-	dt.mx.RLock()
-	defer dt.mx.RUnlock()
-	return dt.taskType
-}
-
-func (dt *deliverTxTask) SetStatus(s status) {
-	dt.mx.Lock()
-	defer dt.mx.Unlock()
-	dt.status = s
-}
-
-func (dt *deliverTxTask) Status() status {
-	dt.mx.RLock()
-	defer dt.mx.RUnlock()
-	return dt.status
-}
-
-func (dt *deliverTxTask) IsInvalid() bool {
-	dt.mx.RLock()
-	defer dt.mx.RUnlock()
-	return dt.status == statusInvalid || dt.status == statusAborted
-}
-
-func (dt *deliverTxTask) IsValid() bool {
-	dt.mx.RLock()
-	defer dt.mx.RUnlock()
-	return dt.status == statusValidated
-}
-
-func (dt *deliverTxTask) IsWaiting() bool {
-	dt.mx.RLock()
-	defer dt.mx.RUnlock()
-	return dt.status == statusWaiting
-}
-
-func (dt *deliverTxTask) Reset() {
-	dt.mx.Lock()
-	defer dt.mx.Unlock()
-	dt.status = statusPending
-	dt.Response = nil
-	dt.Abort = nil
-	dt.AbortCh = nil
-	dt.Dependencies = nil
-	dt.VersionStores = nil
-}
-
-func (dt *deliverTxTask) ResetForExecution() {
-	dt.mx.Lock()
-	defer dt.mx.Unlock()
-	dt.status = statusPending
-	dt.taskType = TypeExecution
-	dt.Response = nil
-	dt.Abort = nil
-	dt.AbortCh = nil
-	dt.Dependencies = nil
-	dt.VersionStores = nil
-}
-
-func (dt *deliverTxTask) Increment() {
-	dt.Incarnation++
-	dt.ValidateCh = make(chan status, 1)
-}
-
 // Scheduler processes tasks concurrently
 type Scheduler interface {
 	ProcessAll(ctx sdk.Context, reqs []*sdk.DeliverTxEntry) ([]types.ResponseDeliverTx, error)
-	ProcessAllSync(ctx sdk.Context, reqs []*sdk.DeliverTxEntry) ([]types.ResponseDeliverTx, error)
 }
 
 type scheduler struct {
@@ -167,359 +35,126 @@ func NewScheduler(workers int, tracingInfo *tracing.Info, deliverTxFunc func(ctx
 	}
 }
 
-func (s *scheduler) invalidateTask(task *deliverTxTask) {
-	for _, mv := range s.multiVersionStores {
-		mv.InvalidateWriteset(task.Index, task.Incarnation)
-		mv.ClearReadset(task.Index)
-		mv.ClearIterateset(task.Index)
-	}
-}
-
-func start(ctx context.Context, ch chan func(), workers int) {
-	for i := 0; i < workers; i++ {
-		go func() {
-			for {
-				select {
-				case <-ctx.Done():
-					return
-				case work := <-ch:
-					work()
-				}
-			}
-		}()
-	}
-}
-
-func (s *scheduler) DoValidate(work func()) {
-	s.validateCh <- work
-}
-
-func (s *scheduler) DoExecute(work func()) {
-	s.executeCh <- work
-}
-
-func (s *scheduler) findConflicts(task *deliverTxTask) (bool, []int) {
-	var conflicts []int
-	uniq := make(map[int]struct{})
-	valid := true
-	for _, mv := range s.multiVersionStores {
-		ok, mvConflicts := mv.ValidateTransactionState(task.Index)
-		for _, c := range mvConflicts {
-			if _, ok := uniq[c]; !ok {
-				conflicts = append(conflicts, c)
-				uniq[c] = struct{}{}
-			}
-		}
-		// any non-ok value makes valid false
-		valid = ok && valid
-	}
-	sort.Ints(conflicts)
-	return valid, conflicts
-}
-
-func toTasks(ctx sdk.Context, reqs []*sdk.DeliverTxEntry) []*deliverTxTask {
-	res := make([]*deliverTxTask, 0, len(reqs))
-	for idx, r := range reqs {
-		res = append(res, &deliverTxTask{
-			Request:    r.Request,
-			Index:      idx,
-			Ctx:        ctx,
-			status:     statusPending,
-			ValidateCh: make(chan status, 1),
-		})
-	}
-	return res
-}
-
-func collectResponses(tasks []*deliverTxTask) []types.ResponseDeliverTx {
-	res := make([]types.ResponseDeliverTx, 0, len(tasks))
-	for _, t := range tasks {
-		res = append(res, *t.Response)
-	}
-	return res
-}
-
-func (s *scheduler) tryInitMultiVersionStore(ctx sdk.Context) {
-	if s.multiVersionStores != nil {
-		return
-	}
-	mvs := make(map[sdk.StoreKey]multiversion.MultiVersionStore)
-	keys := ctx.MultiStore().StoreKeys()
-	for _, sk := range keys {
-		mvs[sk] = multiversion.NewMultiVersionStore(ctx.MultiStore().GetKVStore(sk))
-	}
-	s.multiVersionStores = mvs
-}
-
-func allValidated(tasks []*deliverTxTask) bool {
-	for _, t := range tasks {
-		if !t.IsStatus(statusValidated) {
-			return false
-		}
-	}
-	return true
-}
-
-func (s *scheduler) PrefillEstimates(reqs []*sdk.DeliverTxEntry) {
-	// iterate over TXs, update estimated writesets where applicable
-	for i, req := range reqs {
-		mappedWritesets := req.EstimatedWritesets
-		// order shouldnt matter for storeKeys because each storeKey partitioned MVS is independent
-		for storeKey, writeset := range mappedWritesets {
-			// we use `-1` to indicate a prefill incarnation
-			s.multiVersionStores[storeKey].SetEstimatedWriteset(i, -1, writeset)
-		}
-	}
-}
-
-func (s *scheduler) ProcessAllSync(ctx sdk.Context, reqs []*sdk.DeliverTxEntry) ([]types.ResponseDeliverTx, error) {
-	// initialize mutli-version stores if they haven't been initialized yet
-	s.tryInitMultiVersionStore(ctx)
+func (s *scheduler) ProcessAll(ctx sdk.Context, reqs []*sdk.DeliverTxEntry) ([]types.ResponseDeliverTx, error) {
+	// initialize mutli-version stores
+	s.initMultiVersionStore(ctx)
 	// prefill estimates
 	s.PrefillEstimates(reqs)
 	tasks := toTasks(ctx, reqs)
 	s.allTasks = tasks
-	s.executeCh = make(chan func(), len(tasks))
-	s.validateCh = make(chan func(), len(tasks))
 
-	// default to number of tasks if workers is negative or 0 by this point
 	workers := s.workers
 	if s.workers < 1 {
 		workers = len(tasks)
 	}
 
-	workerCtx, cancel := context.WithCancel(ctx.Context())
-	defer cancel()
+	// initialize scheduler queue
+	queue := NewSchedulerQueue(tasks, workers)
+	for _, t := range tasks {
+		queue.AddExecutionTask(t.Index)
+	}
+
+	active := atomic.Int32{}
+	wg := sync.WaitGroup{}
+	wg.Add(workers)
+	final := atomic.Bool{}
+	finisher := sync.Once{}
+	mx := sync.Mutex{}
 
-	// execution tasks are limited by workers
-	start(workerCtx, s.executeCh, workers)
+	for i := 0; i < workers; i++ {
+		go func(worker int) {
+			defer wg.Done()
 
-	// validation tasks uses length of tasks to avoid blocking on validation
-	start(workerCtx, s.validateCh, len(tasks))
+			for {
 
-	toExecute := tasks
-	for !allValidated(tasks) {
-		var err error
+				// check if all tasks are complete AND not running anything
+				mx.Lock()
+				if active.Load() == 0 && queue.IsCompleted() {
+					if final.Load() {
+						finisher.Do(func() {
+							queue.Close()
+						})
+					} else {
+						// try one more validation of everything at end
+						final.Store(true)
+						queue.ValidateTasksAfterIndex(-1)
+					}
+				}
+				mx.Unlock()
 
-		// execute sets statuses of tasks to either executed or aborted
-		if len(toExecute) > 0 {
-			err = s.executeAll(ctx, toExecute)
-			if err != nil {
-				return nil, err
+				//TODO: remove once we feel good about this not hanging
+				nt := waitWithMsg(fmt.Sprintf("worker=%d: next task...", worker), func() {
+					fmt.Println(fmt.Sprintf("worker=%d: active=%d", worker, active.Load()))
+				})
+				t, ok := queue.NextTask()
+				nt()
+				if !ok {
+					return
+				}
+				active.Add(1)
+				if !s.processTask(t, ctx, queue, tasks) {
+					// if anything doesn't validate successfully, we will need a final re-sweep
+					final.Store(false)
+				}
+				active.Add(-1)
 			}
-		}
 
-		// validate returns any that should be re-executed
-		// note this processes ALL tasks, not just those recently executed
-		toExecute, err = s.validateAll(ctx, tasks)
-		if err != nil {
-			return nil, err
-		}
+		}(i)
 	}
+
+	wg.Wait()
+
 	for _, mv := range s.multiVersionStores {
 		mv.WriteLatestToStore()
 	}
 	return collectResponses(tasks), nil
 }
 
-func (s *scheduler) shouldRerun(task *deliverTxTask) bool {
-	switch task.Status() {
-
-	case statusAborted, statusPending:
-		return true
+func (s *scheduler) processTask(t *deliverTxTask, ctx sdk.Context, queue *SchedulerQueue, tasks []*deliverTxTask) bool {
+	switch t.TaskType() {
+	case TypeValidation, TypeIdle:
+		TaskLog(t, "validate")
+		s.validateTask(ctx, t)
 
-	// validated tasks can become unvalidated if an earlier re-run task now conflicts
-	case statusExecuted, statusValidated:
-		if valid, conflicts := s.findConflicts(task); !valid {
-			s.invalidateTask(task)
-			task.SetStatus(statusInvalid)
+		// check the outcome of validation and do things accordingly
+		switch t.Status() {
+		case statusValidated:
+			// task is possibly finished (can be re-validated by others)
+			TaskLog(t, "VALIDATED (possibly finished)")
+			queue.SetToIdle(t.Index)
 			return true
-		} else if len(conflicts) == 0 {
-			// mark as validated, which will avoid re-validating unless a lower-index re-validates
-			task.SetStatus(statusValidated)
-			return false
+		case statusWaiting, statusExecuted:
+			// task should be re-validated (waiting on others)
+			// how can we wait on dependencies?
+			queue.ReValidate(t.Index)
+		case statusInvalid:
+			// task should be re-executed along with all +1 tasks
+			queue.ReExecute(t.Index)
+			queue.ValidateTasksAfterIndex(t.Index)
+		default:
+			TaskLog(t, "unexpected status")
+			panic("unexpected status ")
 		}
-		// conflicts and valid, so it'll validate next time
-		return false
-	}
-	panic("unexpected status: " + task.Status())
-}
 
-func (s *scheduler) validateTask(ctx sdk.Context, task *deliverTxTask) bool {
-	_, span := s.traceSpan(ctx, "SchedulerValidate", task)
-	defer span.End()
-
-	if s.shouldRerun(task) {
-		return false
-	}
-	return true
-}
-
-func (s *scheduler) findFirstNonValidated() (int, bool) {
-	for i, t := range s.allTasks {
-		if t.Status() != statusValidated {
-			return i, true
-		}
-	}
-	return 0, false
-}
-
-func (s *scheduler) validateAll(ctx sdk.Context, tasks []*deliverTxTask) ([]*deliverTxTask, error) {
-	ctx, span := s.traceSpan(ctx, "SchedulerValidateAll", nil)
-	defer span.End()
-
-	var mx sync.Mutex
-	var res []*deliverTxTask
-
-	startIdx, anyLeft := s.findFirstNonValidated()
-
-	if !anyLeft {
-		return nil, nil
-	}
-
-	wg := sync.WaitGroup{}
-	for i := startIdx; i < len(tasks); i++ {
-		t := tasks[i]
-		wg.Add(1)
-		s.DoValidate(func() {
-			defer wg.Done()
-			if !s.validateTask(ctx, t) {
-				t.Reset()
-				t.Increment()
-				mx.Lock()
-				res = append(res, t)
-				mx.Unlock()
+	case TypeExecution:
+		TaskLog(t, "execute")
+		t.LockTask()
+		s.prepareTask(t)
+		s.executeTask(t)
+
+		if t.Status() == statusAborted {
+			queue.ReExecute(t.Index)
+		} else {
+			queue.ValidateExecutedTask(t.Index)
+			if t.Incarnation > 0 {
+				queue.ValidateTasksAfterIndex(t.Index)
 			}
-		})
-	}
-	wg.Wait()
-
-	return res, nil
-}
-
-// ExecuteAll executes all tasks concurrently
-func (s *scheduler) executeAll(ctx sdk.Context, tasks []*deliverTxTask) error {
-	ctx, span := s.traceSpan(ctx, "SchedulerExecuteAll", nil)
-	defer span.End()
-
-	// validationWg waits for all validations to complete
-	// validations happen in separate goroutines in order to wait on previous index
-	validationWg := &sync.WaitGroup{}
-	validationWg.Add(len(tasks))
-
-	for _, task := range tasks {
-		t := task
-		s.DoExecute(func() {
-			s.prepareAndRunTask(validationWg, ctx, t)
-		})
-	}
-
-	validationWg.Wait()
-
-	return nil
-}
-
-func (s *scheduler) waitOnPreviousAndValidate(wg *sync.WaitGroup, task *deliverTxTask) {
-	defer wg.Done()
-	defer close(task.ValidateCh)
-	// wait on previous task to finish validation
-	// if a previous task fails validation, then subsequent should fail too (cascade)
-	if task.Index > 0 {
-		res, ok := <-s.allTasks[task.Index-1].ValidateCh
-		if ok && res != statusValidated {
-			task.Reset()
-			task.ValidateCh <- task.Status()
-			return
-		}
-	}
-	// if not validated, reset the task
-	if !s.validateTask(task.Ctx, task) {
-		task.Reset()
-	}
-
-	// notify next task of this one's status
-	task.ValidateCh <- task.Status()
-}
-
-func (s *scheduler) prepareAndRunTask(wg *sync.WaitGroup, ctx sdk.Context, task *deliverTxTask) {
-	eCtx, eSpan := s.traceSpan(ctx, "SchedulerExecute", task)
-	defer eSpan.End()
-	task.Ctx = eCtx
-
-	s.prepareTask(task)
-	s.executeTask(task)
-
-	s.DoValidate(func() {
-		s.waitOnPreviousAndValidate(wg, task)
-	})
-}
-
-func (s *scheduler) traceSpan(ctx sdk.Context, name string, task *deliverTxTask) (sdk.Context, trace.Span) {
-	spanCtx, span := s.tracingInfo.StartWithContext(name, ctx.TraceSpanContext())
-	if task != nil {
-		span.SetAttributes(attribute.String("txHash", fmt.Sprintf("%X", sha256.Sum256(task.Request.Tx))))
-		span.SetAttributes(attribute.Int("txIndex", task.Index))
-		span.SetAttributes(attribute.Int("txIncarnation", task.Incarnation))
-	}
-	ctx = ctx.WithTraceSpanContext(spanCtx)
-	return ctx, span
-}
-
-// prepareTask initializes the context and version stores for a task
-func (s *scheduler) prepareTask(task *deliverTxTask) {
-	ctx := task.Ctx.WithTxIndex(task.Index)
-
-	_, span := s.traceSpan(ctx, "SchedulerPrepare", task)
-	defer span.End()
-
-	// initialize the context
-	abortCh := make(chan occ.Abort, len(s.multiVersionStores))
-
-	// if there are no stores, don't try to wrap, because there's nothing to wrap
-	if len(s.multiVersionStores) > 0 {
-		// non-blocking
-		cms := ctx.MultiStore().CacheMultiStore()
-
-		// init version stores by store key
-		vs := make(map[store.StoreKey]*multiversion.VersionIndexedStore)
-		for storeKey, mvs := range s.multiVersionStores {
-			vs[storeKey] = mvs.VersionedIndexedStore(task.Index, task.Incarnation, abortCh)
 		}
+		t.UnlockTask()
 
-		// save off version store so we can ask it things later
-		task.VersionStores = vs
-		ms := cms.SetKVStores(func(k store.StoreKey, kvs sdk.KVStore) store.CacheWrap {
-			return vs[k]
-		})
-
-		ctx = ctx.WithMultiStore(ms)
+	default:
+		TaskLog(t, "unexpected type")
+		panic("unexpected type")
 	}
-
-	task.AbortCh = abortCh
-	task.Ctx = ctx
-}
-
-// executeTask executes a single task
-func (s *scheduler) executeTask(task *deliverTxTask) {
-	dCtx, dSpan := s.traceSpan(task.Ctx, "SchedulerDeliverTx", task)
-	defer dSpan.End()
-	task.Ctx = dCtx
-
-	resp := s.deliverTx(task.Ctx, task.Request)
-
-	close(task.AbortCh)
-
-	if abt, ok := <-task.AbortCh; ok {
-		task.SetStatus(statusAborted)
-		task.Abort = &abt
-		return
-	}
-
-	// write from version store to multiversion stores
-	for _, v := range task.VersionStores {
-		v.WriteToMultiVersionStore()
-	}
-
-	task.SetStatus(statusExecuted)
-	task.Response = &resp
+	return false
 }
diff --git a/tasks/scheduler_async.go b/tasks/scheduler_async.go
deleted file mode 100644
index 1cda5b0e6..000000000
--- a/tasks/scheduler_async.go
+++ /dev/null
@@ -1,162 +0,0 @@
-package tasks
-
-import (
-	"context"
-	"fmt"
-	"sync"
-	"sync/atomic"
-	"time"
-
-	sdk "github.com/cosmos/cosmos-sdk/types"
-	"github.com/tendermint/tendermint/abci/types"
-)
-
-// TODO: remove after things work
-func TaskLog(task *deliverTxTask, msg string) {
-	// helpful for debugging state transitions
-	//fmt.Println(fmt.Sprintf("Task(%d\t%s):\t%s", task.Index, task.Status, msg))
-}
-
-// TODO: remove after things work
-// waitWithMsg prints a message every 1s, so we can tell what's hanging
-func waitWithMsg(msg string, handlers ...func()) context.CancelFunc {
-	goctx, cancel := context.WithCancel(context.Background())
-	tick := time.NewTicker(1 * time.Second)
-	go func() {
-		for {
-			select {
-			case <-goctx.Done():
-				return
-			case <-tick.C:
-				fmt.Println(msg)
-				for _, h := range handlers {
-					h()
-				}
-			}
-		}
-	}()
-	return cancel
-}
-
-func (s *scheduler) ProcessAll(ctx sdk.Context, reqs []*sdk.DeliverTxEntry) ([]types.ResponseDeliverTx, error) {
-	// initialize mutli-version stores if they haven't been initialized yet
-	s.tryInitMultiVersionStore(ctx)
-	// prefill estimates
-	s.PrefillEstimates(reqs)
-	tasks := toTasks(ctx, reqs)
-	s.allTasks = tasks
-
-	workers := s.workers
-	if s.workers < 1 {
-		workers = len(tasks)
-	}
-
-	// initialize scheduler queue
-	queue := NewSchedulerQueue(tasks, workers)
-	for _, t := range tasks {
-		queue.AddExecutionTask(t.Index)
-	}
-
-	active := atomic.Int32{}
-	wg := sync.WaitGroup{}
-	wg.Add(workers)
-	final := atomic.Bool{}
-	finisher := sync.Once{}
-	mx := sync.Mutex{}
-
-	for i := 0; i < workers; i++ {
-		go func(worker int) {
-			defer wg.Done()
-
-			for {
-
-				// check if all tasks are complete AND not running anything
-				mx.Lock()
-				if active.Load() == 0 && queue.IsCompleted() {
-					if final.Load() {
-						finisher.Do(func() {
-							queue.Close()
-						})
-					} else {
-						// try one more validation of everything at end
-						final.Store(true)
-						for i := 0; i < len(tasks); i++ {
-							queue.AddValidationTask(i)
-						}
-					}
-				}
-				mx.Unlock()
-
-				//TODO: remove once we feel good about this not hanging
-				nt := waitWithMsg(fmt.Sprintf("worker=%d: next task...", worker), func() {
-					fmt.Println(fmt.Sprintf("worker=%d: active=%d", worker, active.Load()))
-				})
-				t, ok := queue.NextTask()
-				nt()
-				if !ok {
-					return
-				}
-				active.Add(1)
-				if !s.processTask(t, ctx, queue, tasks) {
-					// if anything doesn't validate successfully, we will need a final re-sweep
-					final.Store(false)
-				}
-				active.Add(-1)
-			}
-
-		}(i)
-	}
-
-	wg.Wait()
-
-	for _, mv := range s.multiVersionStores {
-		mv.WriteLatestToStore()
-	}
-	return collectResponses(tasks), nil
-}
-
-func (s *scheduler) processTask(t *deliverTxTask, ctx sdk.Context, queue *SchedulerQueue, tasks []*deliverTxTask) bool {
-	switch t.TaskType() {
-	case TypeValidation:
-		TaskLog(t, "validate")
-		s.validateTask(ctx, t)
-
-		// check the outcome of validation and do things accordingly
-		switch t.Status() {
-		case statusValidated:
-			// task is possibly finished (can be re-validated by others)
-			TaskLog(t, "VALIDATED (possibly finished)")
-			queue.SetToIdle(t.Index)
-			return true
-		case statusWaiting, statusExecuted:
-			// task should be re-validated (waiting on others)
-			// how can we wait on dependencies?
-			queue.ReValidate(t.Index)
-		case statusInvalid:
-			// task should be re-executed along with all +1 tasks
-			queue.ReExecute(t.Index)
-			for i := t.Index + 1; i < len(tasks); i++ {
-				queue.AddValidationTask(i)
-			}
-		default:
-			TaskLog(t, "unexpected status")
-			panic("unexpected status ")
-		}
-
-	case TypeExecution:
-		TaskLog(t, "execute")
-		s.prepareTask(t)
-		s.executeTask(t)
-
-		if t.Status() == statusAborted {
-			queue.ReExecute(t.Index)
-		} else {
-			queue.ValidateExecutedTask(t.Index)
-		}
-
-	default:
-		TaskLog(t, "unexpected type")
-		panic("unexpected type ")
-	}
-	return false
-}
diff --git a/tasks/task.go b/tasks/task.go
new file mode 100644
index 000000000..e08f5b9c9
--- /dev/null
+++ b/tasks/task.go
@@ -0,0 +1,143 @@
+package tasks
+
+import (
+	"sync"
+
+	"github.com/cosmos/cosmos-sdk/store/multiversion"
+	sdk "github.com/cosmos/cosmos-sdk/types"
+	"github.com/cosmos/cosmos-sdk/types/occ"
+	"github.com/tendermint/tendermint/abci/types"
+)
+
+type status string
+
+const (
+	// statusPending tasks are ready for execution
+	// all executing tasks are in pending state
+	statusPending status = "pending"
+	// statusExecuted tasks are ready for validation
+	// these tasks did not abort during execution
+	statusExecuted status = "executed"
+	// statusAborted means the task has been aborted
+	// these tasks transition to pending upon next execution
+	statusAborted status = "aborted"
+	// statusValidated means the task has been validated
+	// tasks in this status can be reset if an earlier task fails validation
+	statusValidated status = "validated"
+	// statusInvalid means the task has been invalidated
+	statusInvalid status = "invalid"
+	// statusWaiting tasks are waiting for another tx to complete
+	statusWaiting status = "waiting"
+)
+
+type deliverTxTask struct {
+	Ctx     sdk.Context
+	AbortCh chan occ.Abort
+	rwMx    sync.RWMutex
+	mx      sync.Mutex
+
+	taskType      taskType
+	status        status
+	Dependencies  []int
+	Abort         *occ.Abort
+	Index         int
+	Incarnation   int
+	Request       types.RequestDeliverTx
+	Response      *types.ResponseDeliverTx
+	VersionStores map[sdk.StoreKey]*multiversion.VersionIndexedStore
+	ValidateCh    chan status
+}
+
+func (dt *deliverTxTask) LockTask() {
+	dt.mx.Lock()
+}
+
+func (dt *deliverTxTask) UnlockTask() {
+	dt.mx.Unlock()
+}
+
+func (dt *deliverTxTask) SetTaskType(t taskType) {
+	dt.rwMx.Lock()
+	defer dt.rwMx.Unlock()
+	dt.taskType = t
+}
+
+func (dt *deliverTxTask) IsIdle() bool {
+	return dt.IsTaskType(TypeIdle)
+}
+
+func (dt *deliverTxTask) IsTaskType(t taskType) bool {
+	dt.rwMx.RLock()
+	defer dt.rwMx.RUnlock()
+	return dt.taskType == t
+}
+
+func (dt *deliverTxTask) IsStatus(s status) bool {
+	dt.rwMx.RLock()
+	defer dt.rwMx.RUnlock()
+	return dt.status == s
+}
+
+func (dt *deliverTxTask) TaskType() taskType {
+	dt.rwMx.RLock()
+	defer dt.rwMx.RUnlock()
+	return dt.taskType
+}
+
+func (dt *deliverTxTask) SetStatus(s status) {
+	dt.rwMx.Lock()
+	defer dt.rwMx.Unlock()
+	dt.status = s
+}
+
+func (dt *deliverTxTask) Status() status {
+	dt.rwMx.RLock()
+	defer dt.rwMx.RUnlock()
+	return dt.status
+}
+
+func (dt *deliverTxTask) IsInvalid() bool {
+	dt.rwMx.RLock()
+	defer dt.rwMx.RUnlock()
+	return dt.status == statusInvalid || dt.status == statusAborted
+}
+
+func (dt *deliverTxTask) IsValid() bool {
+	dt.rwMx.RLock()
+	defer dt.rwMx.RUnlock()
+	return dt.status == statusValidated
+}
+
+func (dt *deliverTxTask) IsWaiting() bool {
+	dt.rwMx.RLock()
+	defer dt.rwMx.RUnlock()
+	return dt.status == statusWaiting
+}
+
+func (dt *deliverTxTask) Reset() {
+	dt.rwMx.Lock()
+	defer dt.rwMx.Unlock()
+	dt.status = statusPending
+	dt.Response = nil
+	dt.Abort = nil
+	dt.AbortCh = nil
+	dt.Dependencies = nil
+	dt.VersionStores = nil
+}
+
+func (dt *deliverTxTask) ResetForExecution() {
+	dt.rwMx.Lock()
+	defer dt.rwMx.Unlock()
+	dt.status = statusPending
+	dt.taskType = TypeExecution
+	dt.Response = nil
+	dt.Abort = nil
+	dt.AbortCh = nil
+	dt.Dependencies = nil
+	dt.VersionStores = nil
+}
+
+func (dt *deliverTxTask) Increment() {
+	dt.Incarnation++
+	dt.ValidateCh = make(chan status, 1)
+}
diff --git a/tasks/task_execution.go b/tasks/task_execution.go
new file mode 100644
index 000000000..04da29711
--- /dev/null
+++ b/tasks/task_execution.go
@@ -0,0 +1,67 @@
+package tasks
+
+import (
+	"github.com/cosmos/cosmos-sdk/store/multiversion"
+	store "github.com/cosmos/cosmos-sdk/store/types"
+	sdk "github.com/cosmos/cosmos-sdk/types"
+	"github.com/cosmos/cosmos-sdk/types/occ"
+)
+
+// prepareTask initializes the context and version stores for a task
+func (s *scheduler) prepareTask(task *deliverTxTask) {
+	ctx := task.Ctx.WithTxIndex(task.Index)
+
+	_, span := s.traceSpan(ctx, "SchedulerPrepare", task)
+	defer span.End()
+
+	// initialize the context
+	abortCh := make(chan occ.Abort, len(s.multiVersionStores))
+
+	// if there are no stores, don't try to wrap, because there's nothing to wrap
+	if len(s.multiVersionStores) > 0 {
+		// non-blocking
+		cms := ctx.MultiStore().CacheMultiStore()
+
+		// init version stores by store key
+		vs := make(map[store.StoreKey]*multiversion.VersionIndexedStore)
+		for storeKey, mvs := range s.multiVersionStores {
+			vs[storeKey] = mvs.VersionedIndexedStore(task.Index, task.Incarnation, abortCh)
+		}
+
+		// save off version store so we can ask it things later
+		task.VersionStores = vs
+		ms := cms.SetKVStores(func(k store.StoreKey, kvs sdk.KVStore) store.CacheWrap {
+			return vs[k]
+		})
+
+		ctx = ctx.WithMultiStore(ms)
+	}
+
+	task.AbortCh = abortCh
+	task.Ctx = ctx
+}
+
+// executeTask executes a single task
+func (s *scheduler) executeTask(task *deliverTxTask) {
+	dCtx, dSpan := s.traceSpan(task.Ctx, "SchedulerDeliverTx", task)
+	defer dSpan.End()
+	task.Ctx = dCtx
+
+	resp := s.deliverTx(task.Ctx, task.Request)
+
+	close(task.AbortCh)
+
+	if abt, ok := <-task.AbortCh; ok {
+		task.SetStatus(statusAborted)
+		task.Abort = &abt
+		return
+	}
+
+	// write from version store to multiversion stores
+	for _, v := range task.VersionStores {
+		v.WriteToMultiVersionStore()
+	}
+
+	task.SetStatus(statusExecuted)
+	task.Response = &resp
+}
diff --git a/tasks/queue.go b/tasks/task_queue.go
similarity index 79%
rename from tasks/queue.go
rename to tasks/task_queue.go
index 51c45872d..464ed4c64 100644
--- a/tasks/queue.go
+++ b/tasks/task_queue.go
@@ -67,13 +67,12 @@ func (sq *SchedulerQueue) ReValidate(idx int) {
 	sq.Lock()
 	defer sq.Unlock()
 
-	if sq.tasks[idx].TaskType() != TypeValidation {
+	if !sq.tasks[idx].IsTaskType(TypeValidation) {
 		panic("trying to re-validate a task not in validation state")
 	}
 
 	TaskLog(sq.tasks[idx], "-> re-validate")
 	sq.tasks[idx].Abort = nil
-	sq.tasks[idx].SetStatus(statusExecuted)
 	sq.pushTask(idx)
 }
 
@@ -110,20 +109,22 @@ func (sq *SchedulerQueue) ValidateExecutedTask(idx int) {
 	sq.pushTask(idx)
 }
 
-// AddValidationTask adds a task to the validation queue IF NOT ALREADY in a queue
-func (sq *SchedulerQueue) AddValidationTask(idx int) {
+func (sq *SchedulerQueue) ValidateTasksAfterIndex(afterIdx int) {
 	sq.Lock()
 	defer sq.Unlock()
 
-	// already active
-	if _, ok := sq.active.Load(idx); ok {
-		return
-	}
+	for idx := afterIdx + 1; idx < len(sq.tasks); idx++ {
+		// already active
 
-	TaskLog(sq.tasks[idx], "-> validate")
-	sq.tasks[idx].SetStatus(statusExecuted)
-	sq.tasks[idx].SetTaskType(TypeValidation)
-	sq.pushTask(idx)
+		if _, ok := sq.active.Load(idx); ok {
+			continue
+		}
+
+		TaskLog(sq.tasks[idx], "-> validate")
+		sq.tasks[idx].SetStatus(statusExecuted)
+		sq.tasks[idx].SetTaskType(TypeValidation)
+		sq.pushTask(idx)
+	}
 }
 
 func (sq *SchedulerQueue) pushTask(idx int) {
@@ -170,3 +171,21 @@ func (sq *SchedulerQueue) Close() {
 		sq.cond.Broadcast()
 	})
 }
+
+type taskHeap []int
+
+func (h taskHeap) Len() int           { return len(h) }
+func (h taskHeap) Less(i, j int) bool { return h[i] < h[j] }
+func (h taskHeap) Swap(i, j int)      { h[i], h[j] = h[j], h[i] }
+
+func (h *taskHeap) Push(x interface{}) {
+	*h = append(*h, x.(int))
+}
+
+func (h *taskHeap) Pop() interface{} {
+	old := *h
+	n := len(old)
+	x := old[n-1]
+	*h = old[0 : n-1]
+	return x
+}
diff --git a/tasks/queue_test.go b/tasks/task_queue_test.go
similarity index 63%
rename from tasks/queue_test.go
rename to tasks/task_queue_test.go
index 07ddf3003..9e7727d1e 100644
--- a/tasks/queue_test.go
+++ b/tasks/task_queue_test.go
@@ -21,17 +21,6 @@ func TestNewSchedulerQueue(t *testing.T) {
 	}
 }
 
-func TestAddValidationTask(t *testing.T) {
-	tasks := generateTasks(10)
-	sq := NewSchedulerQueue(tasks, 5)
-
-	sq.AddValidationTask(1)
-
-	if !sq.tasks[1].IsTaskType(TypeValidation) {
-		t.Errorf("Expected task type %d, but got %d", TypeValidation, sq.tasks[1].TaskType())
-	}
-}
-
 func TestAddExecutionTask(t *testing.T) {
 	tasks := generateTasks(10)
 	sq := NewSchedulerQueue(tasks, 5)
@@ -94,28 +83,3 @@ func TestNextTaskOrder(t *testing.T) {
 		t.Errorf("Expected task %v, but got %v", sq.tasks[1], task)
 	}
 }
-
-func TestAddValidationTaskWhenActive(t *testing.T) {
-	tasks := generateTasks(10)
-	sq := NewSchedulerQueue(tasks, 5)
-
-	// Add task to execution queue
-	sq.AddExecutionTask(1)
-	// Try to add the same task to validation queue
-	sq.AddValidationTask(1)
-
-	// Verify that the task's type is still TypeExecution
-	if !sq.tasks[1].IsTaskType(TypeExecution) {
-		t.Errorf("Expected task type %d, but got %d", TypeExecution, sq.tasks[1].TaskType())
-	}
-
-	// Add task to validation queue
-	sq.AddValidationTask(2)
-	// Try to add the same task to validation queue again
-	sq.AddValidationTask(2)
-
-	// Verify that the task's type is still TypeValidation
-	if !sq.tasks[2].IsTaskType(TypeValidation) {
-		t.Errorf("Expected task type %d, but got %d", TypeValidation, sq.tasks[2].TaskType())
-	}
-}
diff --git a/tasks/task_validation.go b/tasks/task_validation.go
new file mode 100644
index 000000000..5e1f66769
--- /dev/null
+++ b/tasks/task_validation.go
@@ -0,0 +1,58 @@
+package tasks
+
+import (
+	sdk "github.com/cosmos/cosmos-sdk/types"
+	"sort"
+)
+
+func (s *scheduler) findConflicts(task *deliverTxTask) (bool, []int) {
+	var conflicts []int
+	uniq := make(map[int]struct{})
+	valid := true
+	for _, mv := range s.multiVersionStores {
+		ok, mvConflicts := mv.ValidateTransactionState(task.Index)
+		for _, c := range mvConflicts {
+			if _, ok := uniq[c]; !ok {
+				conflicts = append(conflicts, c)
+				uniq[c] = struct{}{}
+			}
+		}
+		// any non-ok value makes valid false
+		valid = ok && valid
+	}
+	sort.Ints(conflicts)
+	return valid, conflicts
+}
+
+func (s *scheduler) invalidateTask(task *deliverTxTask) {
+	for _, mv := range s.multiVersionStores {
+		mv.InvalidateWriteset(task.Index, task.Incarnation)
+		mv.ClearReadset(task.Index)
+		mv.ClearIterateset(task.Index)
+	}
+}
+
+func (s *scheduler) validateTask(ctx sdk.Context, task *deliverTxTask) bool {
+	// avoids validation races WITHIN a task
+	task.LockTask()
+	defer task.UnlockTask()
+
+	_, span := s.traceSpan(ctx, "SchedulerValidate", task)
+	defer span.End()
+
+	if valid, conflicts := s.findConflicts(task); !valid {
+		s.invalidateTask(task)
+		task.SetStatus(statusInvalid)
+		if len(conflicts) > 0 {
+			task.Dependencies = conflicts
+		}
+		return false
+	} else if len(conflicts) == 0 {
+		// mark as validated, which will avoid re-validating unless a lower-index re-validates
+		task.SetStatus(statusValidated)
+		return true
+	} else {
+		task.Dependencies = conflicts
+	}
+	return false
+}
diff --git a/tasks/utils.go b/tasks/utils.go
new file mode 100644
index 000000000..31f891a0e
--- /dev/null
+++ b/tasks/utils.go
@@ -0,0 +1,95 @@
+package tasks
+
+import (
+	"context"
+	"crypto/sha256"
+	"fmt"
+	"github.com/cosmos/cosmos-sdk/store/multiversion"
+	sdk "github.com/cosmos/cosmos-sdk/types"
+	"github.com/tendermint/tendermint/abci/types"
+	"go.opentelemetry.io/otel/attribute"
+	"go.opentelemetry.io/otel/trace"
+
+	"time"
+)
+
+// TODO: remove after things work
+func TaskLog(task *deliverTxTask, msg string) {
+	// helpful for debugging state transitions
+	//fmt.Println(fmt.Sprintf("Task(%d\t%s):\t%s", task.Index, task.Status, msg))
+}
+
+// TODO: remove after things work
+// waitWithMsg prints a message every 1s, so we can tell what's hanging
+func waitWithMsg(msg string, handlers ...func()) context.CancelFunc {
+	goctx, cancel := context.WithCancel(context.Background())
+	tick := time.NewTicker(1 * time.Second)
+	go func() {
+		for {
+			select {
+			case <-goctx.Done():
+				return
+			case <-tick.C:
+				fmt.Println(msg)
+				for _, h := range handlers {
+					h()
+				}
+			}
+		}
+	}()
+	return cancel
+}
+
+func (s *scheduler) traceSpan(ctx sdk.Context, name string, task *deliverTxTask) (sdk.Context, trace.Span) {
+	spanCtx, span := s.tracingInfo.StartWithContext(name, ctx.TraceSpanContext())
+	if task != nil {
+		span.SetAttributes(attribute.String("txHash", fmt.Sprintf("%X", sha256.Sum256(task.Request.Tx))))
+		span.SetAttributes(attribute.Int("txIndex", task.Index))
+		span.SetAttributes(attribute.Int("txIncarnation", task.Incarnation))
+	}
+	ctx = ctx.WithTraceSpanContext(spanCtx)
+	return ctx, span
+}
+
+func toTasks(ctx sdk.Context, reqs []*sdk.DeliverTxEntry) []*deliverTxTask {
+	res := make([]*deliverTxTask, 0, len(reqs))
+	for idx, r := range reqs {
+		res = append(res, &deliverTxTask{
+			Request:    r.Request,
+			Index:      idx,
+			Ctx:        ctx,
+			status:     statusPending,
+			ValidateCh: make(chan status, 1),
+		})
+	}
+	return res
+}
+
+func collectResponses(tasks []*deliverTxTask) []types.ResponseDeliverTx {
+	res := make([]types.ResponseDeliverTx, 0, len(tasks))
+	for _, t := range tasks {
+		res = append(res, *t.Response)
+	}
+	return res
+}
+
+func (s *scheduler) initMultiVersionStore(ctx sdk.Context) {
+	mvs := make(map[sdk.StoreKey]multiversion.MultiVersionStore)
+	keys := ctx.MultiStore().StoreKeys()
+	for _, sk := range keys {
+		mvs[sk] = multiversion.NewMultiVersionStore(ctx.MultiStore().GetKVStore(sk))
+	}
+	s.multiVersionStores = mvs
+}
+
+func (s *scheduler) PrefillEstimates(reqs []*sdk.DeliverTxEntry) {
+	// iterate over TXs, update estimated writesets where applicable
+	for i, req := range reqs {
+		mappedWritesets := req.EstimatedWritesets
+		// order shouldnt matter for storeKeys because each storeKey partitioned MVS is independent
+		for storeKey, writeset := range mappedWritesets {
+			// we use `-1` to indicate a prefill incarnation
+			s.multiVersionStores[storeKey].SetEstimatedWriteset(i, -1, writeset)
+		}
+	}
+}

From 522ed03ec51576c9e52d5380e430c4990996d75a Mon Sep 17 00:00:00 2001
From: Steven Landers <steven.landers@gmail.com>
Date: Wed, 29 Nov 2023 15:10:06 -0500
Subject: [PATCH 34/65] cleanup

---
 tasks/scheduler.go       |  4 +---
 tasks/scheduler_test.go  |  6 +++---
 tasks/task_queue.go      | 27 ++++++++++++---------------
 tasks/task_queue_test.go | 22 +++++-----------------
 4 files changed, 21 insertions(+), 38 deletions(-)

diff --git a/tasks/scheduler.go b/tasks/scheduler.go
index 55fc0ceb3..1526a44b5 100644
--- a/tasks/scheduler.go
+++ b/tasks/scheduler.go
@@ -50,9 +50,7 @@ func (s *scheduler) ProcessAll(ctx sdk.Context, reqs []*sdk.DeliverTxEntry) ([]t
 
 	// initialize scheduler queue
 	queue := NewSchedulerQueue(tasks, workers)
-	for _, t := range tasks {
-		queue.AddExecutionTask(t.Index)
-	}
+	queue.AddAllTasksToExecutionQueue()
 
 	active := atomic.Int32{}
 	wg := sync.WaitGroup{}
diff --git a/tasks/scheduler_test.go b/tasks/scheduler_test.go
index a5be26154..08a89ccd9 100644
--- a/tasks/scheduler_test.go
+++ b/tasks/scheduler_test.go
@@ -65,10 +65,10 @@ func TestProcessAll(t *testing.T) {
 	}{
 		{
 			name:      "Test no overlap txs",
-			workers:   50,
-			runs:      50,
+			workers:   5,
+			runs:      1,
 			addStores: true,
-			requests:  requestList(100),
+			requests:  requestList(5),
 			deliverTxFunc: func(ctx sdk.Context, req types.RequestDeliverTx) types.ResponseDeliverTx {
 				// all txs read and write to the same key to maximize conflicts
 				kv := ctx.MultiStore().GetKVStore(testStoreKey)
diff --git a/tasks/task_queue.go b/tasks/task_queue.go
index 464ed4c64..de834cc78 100644
--- a/tasks/task_queue.go
+++ b/tasks/task_queue.go
@@ -18,11 +18,12 @@ type SchedulerQueue struct {
 	cond *sync.Cond
 	once sync.Once
 
-	active  sync.Map
-	tasks   []*deliverTxTask
-	queue   *taskHeap
-	workers int
-	closed  bool
+	executing sync.Map
+	active    sync.Map
+	tasks     []*deliverTxTask
+	queue     *taskHeap
+	workers   int
+	closed    bool
 }
 
 func NewSchedulerQueue(tasks []*deliverTxTask, workers int) *SchedulerQueue {
@@ -99,11 +100,10 @@ func (sq *SchedulerQueue) ValidateExecutedTask(idx int) {
 	sq.Lock()
 	defer sq.Unlock()
 
-	if !sq.tasks[idx].IsTaskType(TypeExecution) {
+	if _, ok := sq.active.Load(idx); !ok {
 		TaskLog(sq.tasks[idx], "not in execution")
 		panic("trying to validate a task not in execution")
 	}
-
 	TaskLog(sq.tasks[idx], "-> validate")
 	sq.tasks[idx].SetTaskType(TypeValidation)
 	sq.pushTask(idx)
@@ -133,18 +133,15 @@ func (sq *SchedulerQueue) pushTask(idx int) {
 	sq.cond.Broadcast()
 }
 
-func (sq *SchedulerQueue) AddExecutionTask(idx int) {
+func (sq *SchedulerQueue) AddAllTasksToExecutionQueue() {
 	sq.Lock()
 	defer sq.Unlock()
 
-	// already active
-	if _, ok := sq.active.Load(idx); ok {
-		return
+	for idx := range sq.tasks {
+		TaskLog(sq.tasks[idx], "-> execute")
+		sq.tasks[idx].SetTaskType(TypeExecution)
+		sq.pushTask(idx)
 	}
-
-	TaskLog(sq.tasks[idx], "-> execute")
-	sq.tasks[idx].SetTaskType(TypeExecution)
-	sq.pushTask(idx)
 }
 
 func (sq *SchedulerQueue) NextTask() (*deliverTxTask, bool) {
diff --git a/tasks/task_queue_test.go b/tasks/task_queue_test.go
index 9e7727d1e..572633067 100644
--- a/tasks/task_queue_test.go
+++ b/tasks/task_queue_test.go
@@ -21,22 +21,11 @@ func TestNewSchedulerQueue(t *testing.T) {
 	}
 }
 
-func TestAddExecutionTask(t *testing.T) {
-	tasks := generateTasks(10)
-	sq := NewSchedulerQueue(tasks, 5)
-
-	sq.AddExecutionTask(1)
-
-	if !sq.tasks[1].IsTaskType(TypeExecution) {
-		t.Errorf("Expected task type %d, but got %d", TypeExecution, sq.tasks[1].TaskType())
-	}
-}
-
 func TestSetToIdle(t *testing.T) {
 	tasks := generateTasks(10)
 	sq := NewSchedulerQueue(tasks, 5)
 
-	sq.AddExecutionTask(1)
+	sq.AddAllTasksToExecutionQueue()
 	sq.SetToIdle(1)
 
 	if !sq.tasks[1].IsTaskType(TypeIdle) {
@@ -48,7 +37,7 @@ func TestNextTask(t *testing.T) {
 	tasks := generateTasks(10)
 	sq := NewSchedulerQueue(tasks, 5)
 
-	sq.AddExecutionTask(1)
+	sq.AddAllTasksToExecutionQueue()
 	task, _ := sq.NextTask()
 
 	if task != sq.tasks[1] {
@@ -72,10 +61,9 @@ func TestNextTaskOrder(t *testing.T) {
 	sq := NewSchedulerQueue(tasks, 5)
 
 	// Add tasks in non-sequential order
-	sq.AddExecutionTask(3)
-	sq.AddExecutionTask(1)
-	sq.AddExecutionTask(2)
-	sq.AddExecutionTask(4)
+	sq.AddAllTasksToExecutionQueue()
+	tsk, _ := sq.NextTask()
+	sq.ReExecute(tsk.Index)
 
 	// The task with the lowest index should be returned first
 	task, _ := sq.NextTask()

From 23dfade7d57e1485313f500cb2459fcb6fc2afa2 Mon Sep 17 00:00:00 2001
From: Steven Landers <steven.landers@gmail.com>
Date: Wed, 29 Nov 2023 23:28:59 -0500
Subject: [PATCH 35/65] save off working version before refactor

---
 tasks/scheduler.go       |  77 +++++++----
 tasks/scheduler_test.go  |  66 ++++++++-
 tasks/task.go            |  88 ++++++------
 tasks/task_execution.go  |   8 +-
 tasks/task_queue.go      | 290 +++++++++++++++++++++++++++------------
 tasks/task_queue_test.go | 134 +++++++++++-------
 tasks/task_validation.go |  37 ++---
 tasks/utils.go           |  23 ++--
 8 files changed, 482 insertions(+), 241 deletions(-)

diff --git a/tasks/scheduler.go b/tasks/scheduler.go
index 1526a44b5..b7a04c836 100644
--- a/tasks/scheduler.go
+++ b/tasks/scheduler.go
@@ -16,12 +16,14 @@ type Scheduler interface {
 	ProcessAll(ctx sdk.Context, reqs []*sdk.DeliverTxEntry) ([]types.ResponseDeliverTx, error)
 }
 
+var aborts = atomic.Int32{}
+
 type scheduler struct {
 	deliverTx          func(ctx sdk.Context, req types.RequestDeliverTx) (res types.ResponseDeliverTx)
 	workers            int
 	multiVersionStores map[sdk.StoreKey]multiversion.MultiVersionStore
 	tracingInfo        *tracing.Info
-	allTasks           []*deliverTxTask
+	allTasks           []*TxTask
 	executeCh          chan func()
 	validateCh         chan func()
 }
@@ -49,8 +51,10 @@ func (s *scheduler) ProcessAll(ctx sdk.Context, reqs []*sdk.DeliverTxEntry) ([]t
 	}
 
 	// initialize scheduler queue
-	queue := NewSchedulerQueue(tasks, workers)
-	queue.AddAllTasksToExecutionQueue()
+	queue := NewTaskQueue(tasks)
+
+	// send all tasks to queue
+	queue.ExecuteAll()
 
 	active := atomic.Int32{}
 	wg := sync.WaitGroup{}
@@ -75,25 +79,31 @@ func (s *scheduler) ProcessAll(ctx sdk.Context, reqs []*sdk.DeliverTxEntry) ([]t
 					} else {
 						// try one more validation of everything at end
 						final.Store(true)
-						queue.ValidateTasksAfterIndex(-1)
+						queue.ValidateLaterTasks(-1)
 					}
 				}
 				mx.Unlock()
 
 				//TODO: remove once we feel good about this not hanging
 				nt := waitWithMsg(fmt.Sprintf("worker=%d: next task...", worker), func() {
-					fmt.Println(fmt.Sprintf("worker=%d: active=%d", worker, active.Load()))
+					fmt.Println(fmt.Sprintf("worker=%d: active=%d, complete=%v", worker, active.Load(), queue.IsCompleted()))
 				})
-				t, ok := queue.NextTask()
+				task, anyTasks := queue.NextTask()
 				nt()
-				if !ok {
+				if !anyTasks {
 					return
 				}
 				active.Add(1)
-				if !s.processTask(t, ctx, queue, tasks) {
-					// if anything doesn't validate successfully, we will need a final re-sweep
-					final.Store(false)
+
+				task.LockTask()
+				if taskType, ok := task.PopTaskType(); ok {
+					if !s.processTask(ctx, taskType, worker, task, queue) {
+						final.Store(false)
+					}
+				} else {
+					TaskLog(task, "NONE FOUND...SKIPPING")
 				}
+				task.UnlockTask()
 				active.Add(-1)
 			}
 
@@ -108,9 +118,9 @@ func (s *scheduler) ProcessAll(ctx sdk.Context, reqs []*sdk.DeliverTxEntry) ([]t
 	return collectResponses(tasks), nil
 }
 
-func (s *scheduler) processTask(t *deliverTxTask, ctx sdk.Context, queue *SchedulerQueue, tasks []*deliverTxTask) bool {
-	switch t.TaskType() {
-	case TypeValidation, TypeIdle:
+func (s *scheduler) processTask(ctx sdk.Context, taskType TaskType, w int, t *TxTask, queue Queue) bool {
+	switch taskType {
+	case TypeValidation:
 		TaskLog(t, "validate")
 		s.validateTask(ctx, t)
 
@@ -118,37 +128,48 @@ func (s *scheduler) processTask(t *deliverTxTask, ctx sdk.Context, queue *Schedu
 		switch t.Status() {
 		case statusValidated:
 			// task is possibly finished (can be re-validated by others)
-			TaskLog(t, "VALIDATED (possibly finished)")
-			queue.SetToIdle(t.Index)
+			TaskLog(t, "*** VALIDATED ***")
+			// informs queue that it's complete (any subsequent submission for idx unsets this)
+			queue.FinishTask(t.Index)
 			return true
-		case statusWaiting, statusExecuted:
+		case statusWaiting:
 			// task should be re-validated (waiting on others)
 			// how can we wait on dependencies?
-			queue.ReValidate(t.Index)
+			TaskLog(t, "waiting/executed...revalidating")
+			if queue.DependenciesFinished(t.Index) {
+				queue.Execute(t.Index)
+			} else {
+				queue.ReValidate(t.Index)
+			}
 		case statusInvalid:
 			// task should be re-executed along with all +1 tasks
-			queue.ReExecute(t.Index)
-			queue.ValidateTasksAfterIndex(t.Index)
+			TaskLog(t, "invalid (re-executing, re-validating > tx)")
+			queue.Execute(t.Index)
 		default:
 			TaskLog(t, "unexpected status")
 			panic("unexpected status ")
 		}
 
 	case TypeExecution:
-		TaskLog(t, "execute")
-		t.LockTask()
-		s.prepareTask(t)
+		t.ResetForExecution()
+		TaskLog(t, fmt.Sprintf("execute (worker=%d)", w))
+
 		s.executeTask(t)
 
-		if t.Status() == statusAborted {
+		if t.IsStatus(statusAborted) {
+			aborts.Add(1)
+			if aborts.Load() > 50 {
+				TaskLog(t, fmt.Sprintf("too many aborts, depending on: index=%d", t.Abort.DependentTxIdx))
+				panic("too many aborts")
+			}
 			queue.ReExecute(t.Index)
+
 		} else {
-			queue.ValidateExecutedTask(t.Index)
-			if t.Incarnation > 0 {
-				queue.ValidateTasksAfterIndex(t.Index)
-			}
+			aborts.Store(0)
+			queue.ValidateLaterTasks(t.Index)
+			TaskLog(t, fmt.Sprintf("FINISHING task EXECUTION (worker=%d, incarnation=%d)", w, t.Incarnation))
+			queue.FinishExecute(t.Index)
 		}
-		t.UnlockTask()
 
 	default:
 		TaskLog(t, "unexpected type")
diff --git a/tasks/scheduler_test.go b/tasks/scheduler_test.go
index 08a89ccd9..49399c3c5 100644
--- a/tasks/scheduler_test.go
+++ b/tasks/scheduler_test.go
@@ -52,6 +52,68 @@ func initTestCtx(injectStores bool) sdk.Context {
 	return ctx
 }
 
+func TestExplicitOrdering(t *testing.T) {
+	tests := []struct {
+		name     string
+		scenario func(s *scheduler, ctx sdk.Context, tasks []*TxTask)
+	}{
+		{
+			name: "Test perfect order",
+			scenario: func(s *scheduler, ctx sdk.Context, tasks []*TxTask) {
+				// STARTING HERE
+				// reads nil, writes 0
+				s.executeTask(tasks[0])
+				s.validateTask(ctx, tasks[0])
+
+				// reads 0, writes 1
+				s.executeTask(tasks[1])
+				s.validateTask(ctx, tasks[1])
+
+				// reads the expected things
+				require.Equal(t, "", tasks[0].Response.Info)
+				require.Equal(t, "0", tasks[1].Response.Info)
+
+				// both validated
+				require.Equal(t, statusValidated, tasks[0].status)
+				require.Equal(t, statusValidated, tasks[1].status)
+			},
+		},
+	}
+	for _, test := range tests {
+		deliverTx := func(ctx sdk.Context, req types.RequestDeliverTx) types.ResponseDeliverTx {
+			// all txs read and write to the same key to maximize conflicts
+			kv := ctx.MultiStore().GetKVStore(testStoreKey)
+
+			val := string(kv.Get(itemKey))
+			kv.Set(itemKey, []byte(fmt.Sprintf("%d", ctx.TxIndex())))
+
+			// return what was read from the store (final attempt should be index-1)
+			return types.ResponseDeliverTx{
+				Info: val,
+			}
+		}
+		tp := trace.NewNoopTracerProvider()
+		otel.SetTracerProvider(trace.NewNoopTracerProvider())
+		tr := tp.Tracer("scheduler-test")
+		ti := &tracing.Info{
+			Tracer: &tr,
+		}
+		s := &scheduler{
+			deliverTx:   deliverTx,
+			tracingInfo: ti,
+		}
+		ctx := initTestCtx(true)
+		s.initMultiVersionStore(ctx)
+
+		tasks := generateTasks(2)
+		for _, tsk := range tasks {
+			tsk.Ctx = ctx
+		}
+		test.scenario(s, ctx, tasks)
+	}
+
+}
+
 func TestProcessAll(t *testing.T) {
 	tests := []struct {
 		name          string
@@ -96,8 +158,8 @@ func TestProcessAll(t *testing.T) {
 		},
 		{
 			name:      "Test every tx accesses same key",
-			workers:   50,
-			runs:      50,
+			workers:   100,
+			runs:      1,
 			addStores: true,
 			requests:  requestList(100),
 			deliverTxFunc: func(ctx sdk.Context, req types.RequestDeliverTx) types.ResponseDeliverTx {
diff --git a/tasks/task.go b/tasks/task.go
index e08f5b9c9..4602a6dea 100644
--- a/tasks/task.go
+++ b/tasks/task.go
@@ -1,12 +1,11 @@
 package tasks
 
 import (
-	"sync"
-
 	"github.com/cosmos/cosmos-sdk/store/multiversion"
 	sdk "github.com/cosmos/cosmos-sdk/types"
 	"github.com/cosmos/cosmos-sdk/types/occ"
 	"github.com/tendermint/tendermint/abci/types"
+	"sync"
 )
 
 type status string
@@ -30,14 +29,14 @@ const (
 	statusWaiting status = "waiting"
 )
 
-type deliverTxTask struct {
-	Ctx     sdk.Context
-	AbortCh chan occ.Abort
-	rwMx    sync.RWMutex
-	mx      sync.Mutex
-
-	taskType      taskType
+type TxTask struct {
+	Ctx           sdk.Context
+	AbortCh       chan occ.Abort
+	rwMx          sync.RWMutex
+	mx            sync.Mutex
+	taskType      TaskType
 	status        status
+	ExecutionID   string
 	Dependencies  []int
 	Abort         *occ.Abort
 	Index         int
@@ -45,76 +44,81 @@ type deliverTxTask struct {
 	Request       types.RequestDeliverTx
 	Response      *types.ResponseDeliverTx
 	VersionStores map[sdk.StoreKey]*multiversion.VersionIndexedStore
-	ValidateCh    chan status
 }
 
-func (dt *deliverTxTask) LockTask() {
+func (dt *TxTask) LockTask() {
 	dt.mx.Lock()
 }
 
-func (dt *deliverTxTask) UnlockTask() {
+func (dt *TxTask) UnlockTask() {
 	dt.mx.Unlock()
 }
 
-func (dt *deliverTxTask) SetTaskType(t taskType) {
-	dt.rwMx.Lock()
-	defer dt.rwMx.Unlock()
-	dt.taskType = t
-}
-
-func (dt *deliverTxTask) IsIdle() bool {
-	return dt.IsTaskType(TypeIdle)
-}
-
-func (dt *deliverTxTask) IsTaskType(t taskType) bool {
-	dt.rwMx.RLock()
-	defer dt.rwMx.RUnlock()
-	return dt.taskType == t
-}
-
-func (dt *deliverTxTask) IsStatus(s status) bool {
+func (dt *TxTask) IsStatus(s status) bool {
 	dt.rwMx.RLock()
 	defer dt.rwMx.RUnlock()
 	return dt.status == s
 }
 
-func (dt *deliverTxTask) TaskType() taskType {
-	dt.rwMx.RLock()
-	defer dt.rwMx.RUnlock()
-	return dt.taskType
+func (dt *TxTask) SetTaskType(tt TaskType) bool {
+	dt.rwMx.Lock()
+	defer dt.rwMx.Unlock()
+	switch tt {
+	case TypeValidation:
+		if dt.taskType == TypeNone {
+			TaskLog(dt, "SCHEDULE task VALIDATION")
+			dt.taskType = tt
+			return true
+		}
+	case TypeExecution:
+		if dt.taskType != TypeExecution {
+			TaskLog(dt, "SCHEDULE task EXECUTION")
+			dt.taskType = tt
+			return true
+		}
+	}
+	return false
+}
+
+func (dt *TxTask) PopTaskType() (TaskType, bool) {
+	dt.rwMx.Lock()
+	defer dt.rwMx.Unlock()
+	tt := dt.taskType
+	dt.taskType = TypeNone
+	return tt, tt != TypeNone
 }
 
-func (dt *deliverTxTask) SetStatus(s status) {
+func (dt *TxTask) SetStatus(s status) {
 	dt.rwMx.Lock()
 	defer dt.rwMx.Unlock()
 	dt.status = s
 }
 
-func (dt *deliverTxTask) Status() status {
+func (dt *TxTask) Status() status {
 	dt.rwMx.RLock()
 	defer dt.rwMx.RUnlock()
 	return dt.status
 }
 
-func (dt *deliverTxTask) IsInvalid() bool {
+func (dt *TxTask) IsInvalid() bool {
 	dt.rwMx.RLock()
 	defer dt.rwMx.RUnlock()
 	return dt.status == statusInvalid || dt.status == statusAborted
 }
 
-func (dt *deliverTxTask) IsValid() bool {
+func (dt *TxTask) IsValid() bool {
 	dt.rwMx.RLock()
 	defer dt.rwMx.RUnlock()
 	return dt.status == statusValidated
 }
 
-func (dt *deliverTxTask) IsWaiting() bool {
+func (dt *TxTask) IsWaiting() bool {
 	dt.rwMx.RLock()
 	defer dt.rwMx.RUnlock()
 	return dt.status == statusWaiting
 }
 
-func (dt *deliverTxTask) Reset() {
+func (dt *TxTask) Reset() {
 	dt.rwMx.Lock()
 	defer dt.rwMx.Unlock()
 	dt.status = statusPending
@@ -125,11 +129,10 @@ func (dt *deliverTxTask) Reset() {
 	dt.VersionStores = nil
 }
 
-func (dt *deliverTxTask) ResetForExecution() {
+func (dt *TxTask) ResetForExecution() {
 	dt.rwMx.Lock()
 	defer dt.rwMx.Unlock()
 	dt.status = statusPending
-	dt.taskType = TypeExecution
 	dt.Response = nil
 	dt.Abort = nil
 	dt.AbortCh = nil
@@ -137,7 +140,6 @@ func (dt *deliverTxTask) ResetForExecution() {
 	dt.VersionStores = nil
 }
 
-func (dt *deliverTxTask) Increment() {
+func (dt *TxTask) Increment() {
 	dt.Incarnation++
-	dt.ValidateCh = make(chan status, 1)
 }
diff --git a/tasks/task_execution.go b/tasks/task_execution.go
index 04da29711..f784a55ad 100644
--- a/tasks/task_execution.go
+++ b/tasks/task_execution.go
@@ -8,7 +8,7 @@ import (
 )
 
 // prepareTask initializes the context and version stores for a task
-func (s *scheduler) prepareTask(task *deliverTxTask) {
+func (s *scheduler) prepareTask(task *TxTask) {
 	ctx := task.Ctx.WithTxIndex(task.Index)
 
 	_, span := s.traceSpan(ctx, "SchedulerPrepare", task)
@@ -42,11 +42,13 @@ func (s *scheduler) prepareTask(task *deliverTxTask) {
 }
 
 // executeTask executes a single task
-func (s *scheduler) executeTask(task *deliverTxTask) {
-	dCtx, dSpan := s.traceSpan(task.Ctx, "SchedulerDeliverTx", task)
+func (s *scheduler) executeTask(task *TxTask) {
+	dCtx, dSpan := s.traceSpan(task.Ctx, "SchedulerExecuteTask", task)
 	defer dSpan.End()
 	task.Ctx = dCtx
 
+	s.prepareTask(task)
+
 	resp := s.deliverTx(task.Ctx, task.Request)
 
 	close(task.AbortCh)
diff --git a/tasks/task_queue.go b/tasks/task_queue.go
index de834cc78..8bc7f088d 100644
--- a/tasks/task_queue.go
+++ b/tasks/task_queue.go
@@ -2,151 +2,244 @@ package tasks
 
 import (
 	"container/heap"
+	"fmt"
+	"sort"
 	"sync"
 )
 
-type taskType int
+type TaskType string
 
 const (
-	TypeIdle taskType = iota
-	TypeExecution
-	TypeValidation
+	TypeNone       TaskType = "NONE"
+	TypeExecution  TaskType = "EXECUTE"
+	TypeValidation TaskType = "VALIDATE"
 )
 
-type SchedulerQueue struct {
-	mx   sync.Mutex
-	cond *sync.Cond
-	once sync.Once
+type Queue interface {
+	// NextTask returns the next task to be executed, or nil if the queue is closed.
+	NextTask() (*TxTask, bool)
+	// Close closes the queue, causing NextTask to return false.
+	Close()
+	// ExecuteAll executes all tasks in the queue.
+	ExecuteAll()
+	// Execute executes a task
+	Execute(idx int)
+	// ReExecute re-executes a task that just executed
+	ReExecute(idx int)
+	// ReValidate re-validates a task.
+	ReValidate(idx int)
+	// FinishExecute marks a task as finished executing.
+	FinishExecute(idx int)
+	// FinishTask marks a task as finished (only upon valid).
+	FinishTask(idx int)
+	// ValidateLaterTasks marks all tasks after the given index as pending validation.
+	ValidateLaterTasks(afterIdx int)
+	// IsCompleted returns true if all tasks have been executed and validated.
+	IsCompleted() bool
+	// DependenciesFinished returns whether all dependencies are finished
+	DependenciesFinished(idx int) bool
+}
 
+type taskQueue struct {
+	mx        sync.Mutex
+	condMx    sync.Mutex
+	heapMx    sync.Mutex
+	cond      *sync.Cond
+	once      sync.Once
 	executing sync.Map
-	active    sync.Map
-	tasks     []*deliverTxTask
+	queued    sync.Map
+	finished  sync.Map
+	tasks     []*TxTask
 	queue     *taskHeap
-	workers   int
 	closed    bool
 }
 
-func NewSchedulerQueue(tasks []*deliverTxTask, workers int) *SchedulerQueue {
-	sq := &SchedulerQueue{
-		tasks:   tasks,
-		queue:   &taskHeap{},
-		workers: workers,
+func NewTaskQueue(tasks []*TxTask) Queue {
+	sq := &taskQueue{
+		tasks: tasks,
+		queue: &taskHeap{},
 	}
-	sq.cond = sync.NewCond(&sq.mx)
+	sq.cond = sync.NewCond(&sq.condMx)
 
 	return sq
 }
 
-func (sq *SchedulerQueue) Lock() {
+func (sq *taskQueue) lock() {
 	sq.mx.Lock()
 }
 
-func (sq *SchedulerQueue) Unlock() {
+func (sq *taskQueue) unlock() {
 	sq.mx.Unlock()
 }
 
-func (sq *SchedulerQueue) SetToIdle(idx int) {
-	sq.Lock()
-	defer sq.Unlock()
-	sq.tasks[idx].SetTaskType(TypeIdle)
-	sq.active.Delete(idx)
+func (sq *taskQueue) execute(idx int) {
+	if sq.tasks[idx].SetTaskType(TypeExecution) {
+		TaskLog(sq.tasks[idx], "-> execute")
+		sq.finished.Delete(idx)
+		sq.executing.Store(idx, struct{}{})
+		sq.pushTask(idx, TypeExecution)
+	}
 }
 
-func (sq *SchedulerQueue) ReExecute(idx int) {
-	sq.Lock()
-	defer sq.Unlock()
+func (sq *taskQueue) validate(idx int) {
+	if sq.isExecuting(idx) {
+		TaskLog(sq.tasks[idx], "(skip validating, executing...)")
+		return
+	}
+	if sq.tasks[idx].SetTaskType(TypeValidation) {
+		TaskLog(sq.tasks[idx], "-> validate")
+		sq.pushTask(idx, TypeValidation)
+	}
+}
 
-	TaskLog(sq.tasks[idx], "-> re-execute")
+func (sq *taskQueue) isQueued(idx int) bool {
+	_, ok := sq.queued.Load(idx)
+	return ok
+}
 
-	sq.tasks[idx].ResetForExecution()
-	sq.pushTask(idx)
+func (sq *taskQueue) isExecuting(idx int) bool {
+	_, ok := sq.executing.Load(idx)
+	return ok
 }
 
-// ReValidate is a helper method that revalidates a task
-// without making it eligible for other workers to request it to validate
-func (sq *SchedulerQueue) ReValidate(idx int) {
-	sq.Lock()
-	defer sq.Unlock()
+// FinishExecute marks a task as finished executing and transitions directly validation
+func (sq *taskQueue) FinishExecute(idx int) {
+	sq.lock()
+	defer sq.unlock()
+
+	TaskLog(sq.tasks[idx], fmt.Sprintf("-> finish task execute (%d)", sq.tasks[idx].Incarnation))
 
-	if !sq.tasks[idx].IsTaskType(TypeValidation) {
-		panic("trying to re-validate a task not in validation state")
+	if !sq.isExecuting(idx) {
+		TaskLog(sq.tasks[idx], "not executing, but trying to finish execute")
+		panic("not executing, but trying to finish execute")
 	}
 
-	TaskLog(sq.tasks[idx], "-> re-validate")
-	sq.tasks[idx].Abort = nil
-	sq.pushTask(idx)
+	sq.executing.Delete(idx)
+	sq.validate(idx)
 }
 
-func (sq *SchedulerQueue) IsCompleted() bool {
-	sq.Lock()
-	defer sq.Unlock()
+// FinishTask marks a task as finished if nothing else queued it
+// this drives whether the queue thinks everything is done processing
+func (sq *taskQueue) FinishTask(idx int) {
+	sq.lock()
+	defer sq.unlock()
 
-	if len(*sq.queue) == 0 {
-		for _, t := range sq.tasks {
-			if !t.IsValid() || !t.IsIdle() {
-				TaskLog(t, "not valid or not idle")
-				return false
-			}
-		}
-		return true
+	TaskLog(sq.tasks[idx], "FinishTask -> task is FINISHED (for now)")
+
+	sq.finished.Store(idx, struct{}{})
+}
+
+// ReValidate re-validates a task (back to queue from validation)
+func (sq *taskQueue) ReValidate(idx int) {
+	sq.lock()
+	defer sq.unlock()
+
+	if sq.isExecuting(idx) {
+		TaskLog(sq.tasks[idx], "task is executing (unexpected)")
+		panic("cannot re-validate an executing task")
 	}
-	return false
+
+	sq.validate(idx)
 }
 
-// ValidateExecutedTask adds a task to the validation queue IFF it just executed
-// this allows us to transition to validation without making it eligible for something else
-// to add it to validation
-func (sq *SchedulerQueue) ValidateExecutedTask(idx int) {
-	sq.Lock()
-	defer sq.Unlock()
+func (sq *taskQueue) Execute(idx int) {
+	sq.lock()
+	defer sq.unlock()
+
+	TaskLog(sq.tasks[idx], fmt.Sprintf("-> Execute (%d)", sq.tasks[idx].Incarnation))
 
-	if _, ok := sq.active.Load(idx); !ok {
-		TaskLog(sq.tasks[idx], "not in execution")
-		panic("trying to validate a task not in execution")
+	if sq.isExecuting(idx) {
+		TaskLog(sq.tasks[idx], "task is executing (unexpected)")
+		panic("cannot execute an executing task")
 	}
-	TaskLog(sq.tasks[idx], "-> validate")
-	sq.tasks[idx].SetTaskType(TypeValidation)
-	sq.pushTask(idx)
+
+	sq.tasks[idx].Increment()
+	sq.execute(idx)
 }
 
-func (sq *SchedulerQueue) ValidateTasksAfterIndex(afterIdx int) {
-	sq.Lock()
-	defer sq.Unlock()
+// ReExecute re-executes a task (back to queue from execution)
+func (sq *taskQueue) ReExecute(idx int) {
+	sq.lock()
+	defer sq.unlock()
+
+	TaskLog(sq.tasks[idx], fmt.Sprintf("-> RE-execute (%d)", sq.tasks[idx].Incarnation))
+
+	if !sq.isExecuting(idx) {
+		TaskLog(sq.tasks[idx], "task is not executing (unexpected)")
+		panic("cannot re-execute a non-executing task")
+	}
+
+	sq.tasks[idx].Increment()
+	sq.execute(idx)
+}
+
+// ValidateLaterTasks marks all tasks after the given index as pending validation.
+// any executing tasks are skipped
+func (sq *taskQueue) ValidateLaterTasks(afterIdx int) {
+	sq.lock()
+	defer sq.unlock()
 
 	for idx := afterIdx + 1; idx < len(sq.tasks); idx++ {
-		// already active
+		sq.validate(idx)
+	}
+}
+
+func (sq *taskQueue) isFinished(idx int) bool {
+	_, ok := sq.finished.Load(idx)
+	return ok && sq.tasks[idx].IsStatus(statusValidated)
+}
 
-		if _, ok := sq.active.Load(idx); ok {
-			continue
+func (sq *taskQueue) DependenciesFinished(idx int) bool {
+	for _, dep := range sq.tasks[idx].Dependencies {
+		if !sq.isFinished(dep) {
+			return false
 		}
+	}
+	return true
+}
 
-		TaskLog(sq.tasks[idx], "-> validate")
-		sq.tasks[idx].SetStatus(statusExecuted)
-		sq.tasks[idx].SetTaskType(TypeValidation)
-		sq.pushTask(idx)
+// IsCompleted returns true if all tasks are "finished"
+func (sq *taskQueue) IsCompleted() bool {
+	sq.lock()
+	defer sq.unlock()
+
+	if len(*sq.queue) == 0 {
+		for _, t := range sq.tasks {
+			if !sq.isFinished(t.Index) {
+				TaskLog(t, "not finished yet")
+				return false
+			}
+		}
+		return true
 	}
+	return false
 }
 
-func (sq *SchedulerQueue) pushTask(idx int) {
-	sq.active.Store(idx, struct{}{})
+func (sq *taskQueue) pushTask(idx int, taskType TaskType) {
+	sq.condMx.Lock()
+	defer sq.condMx.Unlock()
+	sq.queued.Store(idx, struct{}{})
+	TaskLog(sq.tasks[idx], fmt.Sprintf("-> PUSH task (%s/%d)", taskType, sq.tasks[idx].Incarnation))
 	heap.Push(sq.queue, idx)
 	sq.cond.Broadcast()
 }
 
-func (sq *SchedulerQueue) AddAllTasksToExecutionQueue() {
-	sq.Lock()
-	defer sq.Unlock()
+// ExecuteAll executes all tasks in the queue (called to start processing)
+func (sq *taskQueue) ExecuteAll() {
+	sq.lock()
+	defer sq.unlock()
 
 	for idx := range sq.tasks {
-		TaskLog(sq.tasks[idx], "-> execute")
-		sq.tasks[idx].SetTaskType(TypeExecution)
-		sq.pushTask(idx)
+		sq.execute(idx)
 	}
 }
 
-func (sq *SchedulerQueue) NextTask() (*deliverTxTask, bool) {
-	sq.Lock()
-	defer sq.Unlock()
+// NextTask returns the next task to be executed, or nil if the queue is closed.
+// this hangs if no tasks are ready because it's possible a new task might arrive
+// closing the queue causes NextTask to return false immediately
+func (sq *taskQueue) NextTask() (*TxTask, bool) {
+	sq.condMx.Lock()
+	defer sq.condMx.Unlock()
 
 	for len(*sq.queue) == 0 && !sq.closed {
 		sq.cond.Wait()
@@ -156,14 +249,24 @@ func (sq *SchedulerQueue) NextTask() (*deliverTxTask, bool) {
 		return nil, false
 	}
 
+	sq.heapMx.Lock()
 	idx := heap.Pop(sq.queue).(int)
-	return sq.tasks[idx], true
+	sq.heapMx.Unlock()
+
+	defer sq.queued.Delete(idx)
+
+	res := sq.tasks[idx]
+
+	TaskLog(res, fmt.Sprintf("<- POP task (%d)", res.Incarnation))
+
+	return res, true
 }
 
-func (sq *SchedulerQueue) Close() {
+// Close closes the queue, causing NextTask to return false.
+func (sq *taskQueue) Close() {
 	sq.once.Do(func() {
-		sq.Lock()
-		defer sq.Unlock()
+		sq.condMx.Lock()
+		defer sq.condMx.Unlock()
 		sq.closed = true
 		sq.cond.Broadcast()
 	})
@@ -176,7 +279,16 @@ func (h taskHeap) Less(i, j int) bool { return h[i] < h[j] }
 func (h taskHeap) Swap(i, j int)      { h[i], h[j] = h[j], h[i] }
 
 func (h *taskHeap) Push(x interface{}) {
+	// Check if the integer already exists in the heap
+	for _, item := range *h {
+		if item == x.(int) {
+			return
+		}
+	}
+	// If it doesn't exist, append it
 	*h = append(*h, x.(int))
+	// Sort the heap
+	sort.Ints(*h)
 }
 
 func (h *taskHeap) Pop() interface{} {
diff --git a/tasks/task_queue_test.go b/tasks/task_queue_test.go
index 572633067..22686afd8 100644
--- a/tasks/task_queue_test.go
+++ b/tasks/task_queue_test.go
@@ -1,73 +1,115 @@
 package tasks
 
 import (
+	"container/heap"
+	"github.com/stretchr/testify/assert"
 	"testing"
 )
 
-func generateTasks(count int) []*deliverTxTask {
-	var res []*deliverTxTask
+func generateTasks(count int) []*TxTask {
+	var res []*TxTask
 	for i := 0; i < count; i++ {
-		res = append(res, &deliverTxTask{Index: i})
+		res = append(res, &TxTask{Index: i})
 	}
 	return res
 }
 
-func TestNewSchedulerQueue(t *testing.T) {
-	tasks := generateTasks(10)
-	sq := NewSchedulerQueue(tasks, 5)
+func assertExecuting(t *testing.T, task *TxTask) {
 
-	if len(sq.tasks) != len(tasks) {
-		t.Errorf("Expected tasks length %d, but got %d", len(tasks), len(sq.tasks))
-	}
+	assert.True(t, task.taskType == TypeExecution)
 }
 
-func TestSetToIdle(t *testing.T) {
-	tasks := generateTasks(10)
-	sq := NewSchedulerQueue(tasks, 5)
-
-	sq.AddAllTasksToExecutionQueue()
-	sq.SetToIdle(1)
-
-	if !sq.tasks[1].IsTaskType(TypeIdle) {
-		t.Errorf("Expected task type %d, but got %d", TypeIdle, sq.tasks[1].TaskType())
-	}
+func assertValidating(t *testing.T, task *TxTask) {
+	assert.True(t, task.taskType == TypeValidation)
 }
 
-func TestNextTask(t *testing.T) {
+func testQueue() (Queue, []*TxTask) {
 	tasks := generateTasks(10)
-	sq := NewSchedulerQueue(tasks, 5)
-
-	sq.AddAllTasksToExecutionQueue()
-	task, _ := sq.NextTask()
-
-	if task != sq.tasks[1] {
-		t.Errorf("Expected task %v, but got %v", sq.tasks[1], task)
-	}
+	return NewTaskQueue(tasks), tasks
 }
 
-func TestClose(t *testing.T) {
-	tasks := generateTasks(10)
-	sq := NewSchedulerQueue(tasks, 5)
+func TestSchedulerQueue(t *testing.T) {
+	queue, tasks := testQueue()
 
-	sq.Close()
+	// Test ExecuteAll
+	queue.ExecuteAll()
+	for _, task := range tasks {
+		assertExecuting(t, task)
+	}
 
-	if sq.closed != true {
-		t.Errorf("Expected closed to be true, but got %v", sq.closed)
+	// Test NextTask
+	nextTask, ok := queue.NextTask()
+	assert.True(t, ok)
+	assert.Equal(t, tasks[0], nextTask)
+
+	// Test Close
+	queue.Close()
+	_, ok = queue.NextTask()
+	assert.False(t, ok)
+
+	// Test FinishExecute leads to Validation
+	queue, tasks = testQueue()
+	queue.ExecuteAll()
+	nextTask, ok = queue.NextTask()
+	assert.True(t, ok)
+	queue.FinishExecute(nextTask.Index)
+	assertValidating(t, nextTask)
+
+	// Test Execute->ReExecute leads to Execution
+	queue, tasks = testQueue()
+	queue.ExecuteAll()
+	nextTask, ok = queue.NextTask()
+	assert.True(t, ok)
+	queue.ReExecute(nextTask.Index)
+	assertExecuting(t, nextTask)
+
+	// Test that validation doesn't happen for executing task
+	queue, tasks = testQueue()
+	queue.ExecuteAll()
+	queue.ValidateLaterTasks(-1)
+	nextTask, ok = queue.NextTask()
+	assert.True(t, ok)
+	assertExecuting(t, nextTask) // still executing
+
+	// Test that validation happens for finished tasks
+	queue, tasks = testQueue()
+	queue.ExecuteAll()
+	queue.ValidateLaterTasks(-1)
+	nextTask, ok = queue.NextTask()
+	assert.True(t, ok)
+	assertExecuting(t, nextTask)
+
+	// Test IsCompleted
+	queue, tasks = testQueue()
+	queue.ExecuteAll()
+
+	for idx, task := range tasks {
+		task.SetStatus(statusValidated)
+		queue.NextTask()
+		queue.FinishTask(idx)
+		if idx == len(tasks)-1 {
+			queue.Close()
+		}
 	}
+	assert.True(t, queue.IsCompleted())
 }
 
-func TestNextTaskOrder(t *testing.T) {
-	tasks := generateTasks(10)
-	sq := NewSchedulerQueue(tasks, 5)
+func TestTaskHeap(t *testing.T) {
+	h := &taskHeap{}
+	heap.Init(h)
 
-	// Add tasks in non-sequential order
-	sq.AddAllTasksToExecutionQueue()
-	tsk, _ := sq.NextTask()
-	sq.ReExecute(tsk.Index)
+	// Test Push
+	heap.Push(h, 3)
+	heap.Push(h, 1)
+	heap.Push(h, 2)
+	heap.Push(h, 1) // Duplicate, should not be added
 
-	// The task with the lowest index should be returned first
-	task, _ := sq.NextTask()
-	if task != sq.tasks[1] {
-		t.Errorf("Expected task %v, but got %v", sq.tasks[1], task)
-	}
+	assert.Equal(t, 3, h.Len(), "Heap should contain 3 items")
+
+	// Test Pop
+	assert.Equal(t, 1, heap.Pop(h), "First pop should return the smallest element")
+	assert.Equal(t, 2, heap.Pop(h), "Second pop should return the next smallest element")
+	assert.Equal(t, 3, heap.Pop(h), "Third pop should return the largest element")
+
+	assert.Equal(t, 0, h.Len(), "Heap should be empty after all elements are popped")
 }
diff --git a/tasks/task_validation.go b/tasks/task_validation.go
index 5e1f66769..0c8d303d9 100644
--- a/tasks/task_validation.go
+++ b/tasks/task_validation.go
@@ -5,7 +5,7 @@ import (
 	"sort"
 )
 
-func (s *scheduler) findConflicts(task *deliverTxTask) (bool, []int) {
+func (s *scheduler) findConflicts(task *TxTask) (bool, []int) {
 	var conflicts []int
 	uniq := make(map[int]struct{})
 	valid := true
@@ -24,7 +24,7 @@ func (s *scheduler) findConflicts(task *deliverTxTask) (bool, []int) {
 	return valid, conflicts
 }
 
-func (s *scheduler) invalidateTask(task *deliverTxTask) {
+func (s *scheduler) invalidateTask(task *TxTask) {
 	for _, mv := range s.multiVersionStores {
 		mv.InvalidateWriteset(task.Index, task.Incarnation)
 		mv.ClearReadset(task.Index)
@@ -32,27 +32,28 @@ func (s *scheduler) invalidateTask(task *deliverTxTask) {
 	}
 }
 
-func (s *scheduler) validateTask(ctx sdk.Context, task *deliverTxTask) bool {
-	// avoids validation races WITHIN a task
-	task.LockTask()
-	defer task.UnlockTask()
-
+func (s *scheduler) validateTask(ctx sdk.Context, task *TxTask) {
 	_, span := s.traceSpan(ctx, "SchedulerValidate", task)
 	defer span.End()
 
-	if valid, conflicts := s.findConflicts(task); !valid {
+	valid, conflicts := s.findConflicts(task)
+	task.Dependencies = conflicts
+
+	if !valid {
 		s.invalidateTask(task)
-		task.SetStatus(statusInvalid)
 		if len(conflicts) > 0 {
-			task.Dependencies = conflicts
+			task.SetStatus(statusWaiting)
+			return
 		}
-		return false
-	} else if len(conflicts) == 0 {
-		// mark as validated, which will avoid re-validating unless a lower-index re-validates
-		task.SetStatus(statusValidated)
-		return true
-	} else {
-		task.Dependencies = conflicts
+		task.SetStatus(statusInvalid)
+		return
 	}
-	return false
+
+	if len(conflicts) > 0 {
+		task.SetStatus(statusWaiting)
+		return
+	}
+
+	task.SetStatus(statusValidated)
+
 }
diff --git a/tasks/utils.go b/tasks/utils.go
index 31f891a0e..a3a4e1ff7 100644
--- a/tasks/utils.go
+++ b/tasks/utils.go
@@ -14,9 +14,9 @@ import (
 )
 
 // TODO: remove after things work
-func TaskLog(task *deliverTxTask, msg string) {
+func TaskLog(task *TxTask, msg string) {
 	// helpful for debugging state transitions
-	//fmt.Println(fmt.Sprintf("Task(%d\t%s):\t%s", task.Index, task.Status, msg))
+	//fmt.Println(fmt.Sprintf("%d: Task(%d/%s/%d):\t%s", time.Now().UnixMicro(), task.Index, task.status, task.Incarnation, msg))
 }
 
 // TODO: remove after things work
@@ -40,7 +40,7 @@ func waitWithMsg(msg string, handlers ...func()) context.CancelFunc {
 	return cancel
 }
 
-func (s *scheduler) traceSpan(ctx sdk.Context, name string, task *deliverTxTask) (sdk.Context, trace.Span) {
+func (s *scheduler) traceSpan(ctx sdk.Context, name string, task *TxTask) (sdk.Context, trace.Span) {
 	spanCtx, span := s.tracingInfo.StartWithContext(name, ctx.TraceSpanContext())
 	if task != nil {
 		span.SetAttributes(attribute.String("txHash", fmt.Sprintf("%X", sha256.Sum256(task.Request.Tx))))
@@ -51,21 +51,20 @@ func (s *scheduler) traceSpan(ctx sdk.Context, name string, task *deliverTxTask)
 	return ctx, span
 }
 
-func toTasks(ctx sdk.Context, reqs []*sdk.DeliverTxEntry) []*deliverTxTask {
-	res := make([]*deliverTxTask, 0, len(reqs))
+func toTasks(ctx sdk.Context, reqs []*sdk.DeliverTxEntry) []*TxTask {
+	res := make([]*TxTask, 0, len(reqs))
 	for idx, r := range reqs {
-		res = append(res, &deliverTxTask{
-			Request:    r.Request,
-			Index:      idx,
-			Ctx:        ctx,
-			status:     statusPending,
-			ValidateCh: make(chan status, 1),
+		res = append(res, &TxTask{
+			Request: r.Request,
+			Index:   idx,
+			Ctx:     ctx,
+			status:  statusPending,
 		})
 	}
 	return res
 }
 
-func collectResponses(tasks []*deliverTxTask) []types.ResponseDeliverTx {
+func collectResponses(tasks []*TxTask) []types.ResponseDeliverTx {
 	res := make([]types.ResponseDeliverTx, 0, len(tasks))
 	for _, t := range tasks {
 		res = append(res, *t.Response)

From 612547dc5812fa4244c236f209fb60253afde1a1 Mon Sep 17 00:00:00 2001
From: Steven Landers <steven.landers@gmail.com>
Date: Wed, 29 Nov 2023 23:31:39 -0500
Subject: [PATCH 36/65] remove abort limiter

---
 tasks/scheduler.go  | 8 --------
 tasks/task_queue.go | 2 +-
 2 files changed, 1 insertion(+), 9 deletions(-)

diff --git a/tasks/scheduler.go b/tasks/scheduler.go
index b7a04c836..ffa53d838 100644
--- a/tasks/scheduler.go
+++ b/tasks/scheduler.go
@@ -16,8 +16,6 @@ type Scheduler interface {
 	ProcessAll(ctx sdk.Context, reqs []*sdk.DeliverTxEntry) ([]types.ResponseDeliverTx, error)
 }
 
-var aborts = atomic.Int32{}
-
 type scheduler struct {
 	deliverTx          func(ctx sdk.Context, req types.RequestDeliverTx) (res types.ResponseDeliverTx)
 	workers            int
@@ -157,15 +155,9 @@ func (s *scheduler) processTask(ctx sdk.Context, taskType TaskType, w int, t *Tx
 		s.executeTask(t)
 
 		if t.IsStatus(statusAborted) {
-			aborts.Add(1)
-			if aborts.Load() > 50 {
-				TaskLog(t, fmt.Sprintf("too many aborts, depending on: index=%d", t.Abort.DependentTxIdx))
-				panic("too many aborts")
-			}
 			queue.ReExecute(t.Index)
 
 		} else {
-			aborts.Store(0)
 			queue.ValidateLaterTasks(t.Index)
 			TaskLog(t, fmt.Sprintf("FINISHING task EXECUTION (worker=%d, incarnation=%d)", w, t.Incarnation))
 			queue.FinishExecute(t.Index)
diff --git a/tasks/task_queue.go b/tasks/task_queue.go
index 8bc7f088d..a8ac67f7b 100644
--- a/tasks/task_queue.go
+++ b/tasks/task_queue.go
@@ -107,7 +107,7 @@ func (sq *taskQueue) FinishExecute(idx int) {
 	sq.lock()
 	defer sq.unlock()
 
-	TaskLog(sq.tasks[idx], fmt.Sprintf("-> finish task execute (%d)", sq.tasks[idx].Incarnation))
+	TaskLog(sq.tasks[idx], "-> finish task execute")
 
 	if !sq.isExecuting(idx) {
 		TaskLog(sq.tasks[idx], "not executing, but trying to finish execute")

From ab828e02734478bd562ef3a24aae1151baf51339 Mon Sep 17 00:00:00 2001
From: Steven Landers <steven.landers@gmail.com>
Date: Wed, 29 Nov 2023 23:33:16 -0500
Subject: [PATCH 37/65] remove validate all...

---
 tasks/scheduler.go      | 12 +++---------
 tasks/scheduler_test.go |  4 ++--
 2 files changed, 5 insertions(+), 11 deletions(-)

diff --git a/tasks/scheduler.go b/tasks/scheduler.go
index ffa53d838..25bbc666d 100644
--- a/tasks/scheduler.go
+++ b/tasks/scheduler.go
@@ -70,15 +70,9 @@ func (s *scheduler) ProcessAll(ctx sdk.Context, reqs []*sdk.DeliverTxEntry) ([]t
 				// check if all tasks are complete AND not running anything
 				mx.Lock()
 				if active.Load() == 0 && queue.IsCompleted() {
-					if final.Load() {
-						finisher.Do(func() {
-							queue.Close()
-						})
-					} else {
-						// try one more validation of everything at end
-						final.Store(true)
-						queue.ValidateLaterTasks(-1)
-					}
+					finisher.Do(func() {
+						queue.Close()
+					})
 				}
 				mx.Unlock()
 
diff --git a/tasks/scheduler_test.go b/tasks/scheduler_test.go
index 49399c3c5..c82df4c7d 100644
--- a/tasks/scheduler_test.go
+++ b/tasks/scheduler_test.go
@@ -158,8 +158,8 @@ func TestProcessAll(t *testing.T) {
 		},
 		{
 			name:      "Test every tx accesses same key",
-			workers:   100,
-			runs:      1,
+			workers:   50,
+			runs:      50,
 			addStores: true,
 			requests:  requestList(100),
 			deliverTxFunc: func(ctx sdk.Context, req types.RequestDeliverTx) types.ResponseDeliverTx {

From 4be4b86a17f6db20883d07d6e8dd9f22d2d42dab Mon Sep 17 00:00:00 2001
From: Steven Landers <steven.landers@gmail.com>
Date: Thu, 30 Nov 2023 00:23:04 -0500
Subject: [PATCH 38/65] cleanup

---
 tasks/scheduler.go      | 48 ++++++++++++++---------------------------
 tasks/scheduler_test.go | 10 ++++-----
 tasks/task_queue.go     | 16 ++++++--------
 tasks/utils.go          |  2 +-
 4 files changed, 29 insertions(+), 47 deletions(-)

diff --git a/tasks/scheduler.go b/tasks/scheduler.go
index 25bbc666d..305f9f45a 100644
--- a/tasks/scheduler.go
+++ b/tasks/scheduler.go
@@ -2,13 +2,12 @@ package tasks
 
 import (
 	"fmt"
-	"sync"
-	"sync/atomic"
-
 	"github.com/cosmos/cosmos-sdk/store/multiversion"
 	sdk "github.com/cosmos/cosmos-sdk/types"
 	"github.com/cosmos/cosmos-sdk/utils/tracing"
 	"github.com/tendermint/tendermint/abci/types"
+	"sync"
+	"sync/atomic"
 )
 
 // Scheduler processes tasks concurrently
@@ -54,12 +53,9 @@ func (s *scheduler) ProcessAll(ctx sdk.Context, reqs []*sdk.DeliverTxEntry) ([]t
 	// send all tasks to queue
 	queue.ExecuteAll()
 
-	active := atomic.Int32{}
 	wg := sync.WaitGroup{}
 	wg.Add(workers)
-	final := atomic.Bool{}
-	finisher := sync.Once{}
-	mx := sync.Mutex{}
+	count := atomic.Int32{}
 
 	for i := 0; i < workers; i++ {
 		go func(worker int) {
@@ -68,35 +64,23 @@ func (s *scheduler) ProcessAll(ctx sdk.Context, reqs []*sdk.DeliverTxEntry) ([]t
 			for {
 
 				// check if all tasks are complete AND not running anything
-				mx.Lock()
-				if active.Load() == 0 && queue.IsCompleted() {
-					finisher.Do(func() {
-						queue.Close()
-					})
+				if queue.IsCompleted() {
+					queue.Close()
 				}
-				mx.Unlock()
 
-				//TODO: remove once we feel good about this not hanging
-				nt := waitWithMsg(fmt.Sprintf("worker=%d: next task...", worker), func() {
-					fmt.Println(fmt.Sprintf("worker=%d: active=%d, complete=%v", worker, active.Load(), queue.IsCompleted()))
-				})
 				task, anyTasks := queue.NextTask()
-				nt()
 				if !anyTasks {
 					return
 				}
-				active.Add(1)
 
 				task.LockTask()
-				if taskType, ok := task.PopTaskType(); ok {
-					if !s.processTask(ctx, taskType, worker, task, queue) {
-						final.Store(false)
-					}
+				if tt, ok := task.PopTaskType(); ok {
+					count.Add(1)
+					s.processTask(ctx, tt, worker, task, queue)
 				} else {
 					TaskLog(task, "NONE FOUND...SKIPPING")
 				}
 				task.UnlockTask()
-				active.Add(-1)
 			}
 
 		}(i)
@@ -104,6 +88,8 @@ func (s *scheduler) ProcessAll(ctx sdk.Context, reqs []*sdk.DeliverTxEntry) ([]t
 
 	wg.Wait()
 
+	fmt.Println("count", count.Load())
+
 	for _, mv := range s.multiVersionStores {
 		mv.WriteLatestToStore()
 	}
@@ -113,11 +99,12 @@ func (s *scheduler) ProcessAll(ctx sdk.Context, reqs []*sdk.DeliverTxEntry) ([]t
 func (s *scheduler) processTask(ctx sdk.Context, taskType TaskType, w int, t *TxTask, queue Queue) bool {
 	switch taskType {
 	case TypeValidation:
-		TaskLog(t, "validate")
+		TaskLog(t, fmt.Sprintf("TypeValidation (worker=%d)", w))
+
 		s.validateTask(ctx, t)
 
 		// check the outcome of validation and do things accordingly
-		switch t.Status() {
+		switch t.status {
 		case statusValidated:
 			// task is possibly finished (can be re-validated by others)
 			TaskLog(t, "*** VALIDATED ***")
@@ -130,11 +117,8 @@ func (s *scheduler) processTask(ctx sdk.Context, taskType TaskType, w int, t *Tx
 			TaskLog(t, "waiting/executed...revalidating")
 			if queue.DependenciesFinished(t.Index) {
 				queue.Execute(t.Index)
-			} else {
-				queue.ReValidate(t.Index)
 			}
 		case statusInvalid:
-			// task should be re-executed along with all +1 tasks
 			TaskLog(t, "invalid (re-executing, re-validating > tx)")
 			queue.Execute(t.Index)
 		default:
@@ -144,17 +128,17 @@ func (s *scheduler) processTask(ctx sdk.Context, taskType TaskType, w int, t *Tx
 
 	case TypeExecution:
 		t.ResetForExecution()
-		TaskLog(t, fmt.Sprintf("execute (worker=%d)", w))
+		TaskLog(t, fmt.Sprintf("TypeExecution (worker=%d)", w))
 
 		s.executeTask(t)
 
 		if t.IsStatus(statusAborted) {
 			queue.ReExecute(t.Index)
-
 		} else {
-			queue.ValidateLaterTasks(t.Index)
 			TaskLog(t, fmt.Sprintf("FINISHING task EXECUTION (worker=%d, incarnation=%d)", w, t.Incarnation))
 			queue.FinishExecute(t.Index)
+			//TODO: speed this up, too slow to do every time
+			queue.ValidateLaterTasks(t.Index)
 		}
 
 	default:
diff --git a/tasks/scheduler_test.go b/tasks/scheduler_test.go
index c82df4c7d..0f342efa2 100644
--- a/tasks/scheduler_test.go
+++ b/tasks/scheduler_test.go
@@ -127,10 +127,10 @@ func TestProcessAll(t *testing.T) {
 	}{
 		{
 			name:      "Test no overlap txs",
-			workers:   5,
+			workers:   50,
 			runs:      1,
 			addStores: true,
-			requests:  requestList(5),
+			requests:  requestList(1000),
 			deliverTxFunc: func(ctx sdk.Context, req types.RequestDeliverTx) types.ResponseDeliverTx {
 				// all txs read and write to the same key to maximize conflicts
 				kv := ctx.MultiStore().GetKVStore(testStoreKey)
@@ -158,10 +158,10 @@ func TestProcessAll(t *testing.T) {
 		},
 		{
 			name:      "Test every tx accesses same key",
-			workers:   50,
-			runs:      50,
+			workers:   500,
+			runs:      1,
 			addStores: true,
-			requests:  requestList(100),
+			requests:  requestList(10000),
 			deliverTxFunc: func(ctx sdk.Context, req types.RequestDeliverTx) types.ResponseDeliverTx {
 				// all txs read and write to the same key to maximize conflicts
 				kv := ctx.MultiStore().GetKVStore(testStoreKey)
diff --git a/tasks/task_queue.go b/tasks/task_queue.go
index a8ac67f7b..65730a385 100644
--- a/tasks/task_queue.go
+++ b/tasks/task_queue.go
@@ -46,7 +46,7 @@ type taskQueue struct {
 	heapMx    sync.Mutex
 	cond      *sync.Cond
 	once      sync.Once
-	executing sync.Map
+	executing map[int]struct{}
 	queued    sync.Map
 	finished  sync.Map
 	tasks     []*TxTask
@@ -56,8 +56,9 @@ type taskQueue struct {
 
 func NewTaskQueue(tasks []*TxTask) Queue {
 	sq := &taskQueue{
-		tasks: tasks,
-		queue: &taskHeap{},
+		tasks:     tasks,
+		queue:     &taskHeap{},
+		executing: make(map[int]struct{}),
 	}
 	sq.cond = sync.NewCond(&sq.condMx)
 
@@ -76,7 +77,7 @@ func (sq *taskQueue) execute(idx int) {
 	if sq.tasks[idx].SetTaskType(TypeExecution) {
 		TaskLog(sq.tasks[idx], "-> execute")
 		sq.finished.Delete(idx)
-		sq.executing.Store(idx, struct{}{})
+		sq.executing[idx] = struct{}{}
 		sq.pushTask(idx, TypeExecution)
 	}
 }
@@ -98,7 +99,7 @@ func (sq *taskQueue) isQueued(idx int) bool {
 }
 
 func (sq *taskQueue) isExecuting(idx int) bool {
-	_, ok := sq.executing.Load(idx)
+	_, ok := sq.executing[idx]
 	return ok
 }
 
@@ -114,7 +115,7 @@ func (sq *taskQueue) FinishExecute(idx int) {
 		panic("not executing, but trying to finish execute")
 	}
 
-	sq.executing.Delete(idx)
+	delete(sq.executing, idx)
 	sq.validate(idx)
 }
 
@@ -200,9 +201,6 @@ func (sq *taskQueue) DependenciesFinished(idx int) bool {
 
 // IsCompleted returns true if all tasks are "finished"
 func (sq *taskQueue) IsCompleted() bool {
-	sq.lock()
-	defer sq.unlock()
-
 	if len(*sq.queue) == 0 {
 		for _, t := range sq.tasks {
 			if !sq.isFinished(t.Index) {
diff --git a/tasks/utils.go b/tasks/utils.go
index a3a4e1ff7..7650062c4 100644
--- a/tasks/utils.go
+++ b/tasks/utils.go
@@ -20,7 +20,7 @@ func TaskLog(task *TxTask, msg string) {
 }
 
 // TODO: remove after things work
-// waitWithMsg prints a message every 1s, so we can tell what's hanging
+// waitWithMsg prints a message every 1s if not cancelled (for hang situations)
 func waitWithMsg(msg string, handlers ...func()) context.CancelFunc {
 	goctx, cancel := context.WithCancel(context.Background())
 	tick := time.NewTicker(1 * time.Second)

From e613b6d16c0c2bc4567bf9eac44a81f0f610be7b Mon Sep 17 00:00:00 2001
From: Steven Landers <steven.landers@gmail.com>
Date: Thu, 30 Nov 2023 13:23:55 -0500
Subject: [PATCH 39/65] add timer (to remove later)

---
 tasks/scheduler.go       | 216 +++++++++++++++++++++++----------------
 tasks/scheduler_test.go  |   8 +-
 tasks/task_queue.go      | 107 ++++++++++---------
 tasks/task_queue_test.go |   8 --
 tasks/timer.go           | 134 ++++++++++++++++++++++++
 tasks/utils.go           |  24 -----
 6 files changed, 322 insertions(+), 175 deletions(-)
 create mode 100644 tasks/timer.go

diff --git a/tasks/scheduler.go b/tasks/scheduler.go
index 305f9f45a..5b758154a 100644
--- a/tasks/scheduler.go
+++ b/tasks/scheduler.go
@@ -23,6 +23,7 @@ type scheduler struct {
 	allTasks           []*TxTask
 	executeCh          chan func()
 	validateCh         chan func()
+	timer              *Timer
 }
 
 // NewScheduler creates a new scheduler
@@ -31,119 +32,154 @@ func NewScheduler(workers int, tracingInfo *tracing.Info, deliverTxFunc func(ctx
 		workers:     workers,
 		deliverTx:   deliverTxFunc,
 		tracingInfo: tracingInfo,
+		timer:       NewTimer("Scheduler"),
 	}
 }
 
-func (s *scheduler) ProcessAll(ctx sdk.Context, reqs []*sdk.DeliverTxEntry) ([]types.ResponseDeliverTx, error) {
-	// initialize mutli-version stores
-	s.initMultiVersionStore(ctx)
-	// prefill estimates
-	s.PrefillEstimates(reqs)
-	tasks := toTasks(ctx, reqs)
-	s.allTasks = tasks
-
-	workers := s.workers
-	if s.workers < 1 {
-		workers = len(tasks)
-	}
-
-	// initialize scheduler queue
-	queue := NewTaskQueue(tasks)
-
-	// send all tasks to queue
-	queue.ExecuteAll()
-
-	wg := sync.WaitGroup{}
-	wg.Add(workers)
-	count := atomic.Int32{}
-
-	for i := 0; i < workers; i++ {
-		go func(worker int) {
-			defer wg.Done()
+func (s *scheduler) WithTimer(name string, work func()) {
+	id := s.timer.Start(name)
+	work()
+	s.timer.End(name, id)
+}
 
-			for {
+func (s *scheduler) ProcessAll(ctx sdk.Context, reqs []*sdk.DeliverTxEntry) ([]types.ResponseDeliverTx, error) {
+	var results []types.ResponseDeliverTx
+	var err error
+	s.WithTimer("ProcessAll", func() {
+		pas := s.timer.Start("ProcessAll-Setup")
+		// initialize mutli-version stores
+		s.initMultiVersionStore(ctx)
+		// prefill estimates
+		s.PrefillEstimates(reqs)
+		tasks := toTasks(ctx, reqs)
+		s.allTasks = tasks
+
+		workers := s.workers
+		if s.workers < 1 {
+			workers = len(tasks)
+		}
 
-				// check if all tasks are complete AND not running anything
-				if queue.IsCompleted() {
-					queue.Close()
-				}
+		// initialize scheduler queue
+		queue := NewTaskQueue(tasks)
+
+		// send all tasks to queue
+		go queue.ExecuteAll()
+
+		wg := sync.WaitGroup{}
+		wg.Add(workers)
+		count := atomic.Int32{}
+
+		s.timer.End("ProcessAll-Setup", pas)
+		for i := 0; i < workers; i++ {
+			go func(worker int) {
+				defer wg.Done()
+
+				for {
+
+					s.WithTimer("IsCompleted()", func() {
+						if queue.IsCompleted() {
+							queue.Close()
+						}
+					})
+
+					var task *TxTask
+					var anyTasks bool
+					s.WithTimer("NextTask()", func() {
+						task, anyTasks = queue.NextTask()
+					})
+					if !anyTasks {
+						return
+					}
+
+					s.WithTimer("IsCompleted()", func() {
+						task.LockTask()
+						var tt TaskType
+						var ok bool
+						s.WithTimer("PopTaskType()", func() {
+							tt, ok = task.PopTaskType()
+						})
+						if ok {
+							count.Add(1)
+							s.WithTimer("processTask()", func() {
+								s.processTask(ctx, tt, worker, task, queue)
+							})
+						} else {
+							TaskLog(task, "NONE FOUND...SKIPPING")
+						}
+						task.UnlockTask()
+					})
 
-				task, anyTasks := queue.NextTask()
-				if !anyTasks {
-					return
 				}
 
-				task.LockTask()
-				if tt, ok := task.PopTaskType(); ok {
-					count.Add(1)
-					s.processTask(ctx, tt, worker, task, queue)
-				} else {
-					TaskLog(task, "NONE FOUND...SKIPPING")
-				}
-				task.UnlockTask()
-			}
+			}(i)
+		}
 
-		}(i)
-	}
+		wg.Wait()
 
-	wg.Wait()
+		fmt.Println("count", count.Load())
 
-	fmt.Println("count", count.Load())
+		for _, mv := range s.multiVersionStores {
+			mv.WriteLatestToStore()
+		}
+		results = collectResponses(tasks)
+		err = nil
+	})
+	s.timer.PrintReport()
 
-	for _, mv := range s.multiVersionStores {
-		mv.WriteLatestToStore()
-	}
-	return collectResponses(tasks), nil
+	return results, err
 }
 
-func (s *scheduler) processTask(ctx sdk.Context, taskType TaskType, w int, t *TxTask, queue Queue) bool {
+func (s *scheduler) processTask(ctx sdk.Context, taskType TaskType, w int, t *TxTask, queue Queue) {
 	switch taskType {
 	case TypeValidation:
-		TaskLog(t, fmt.Sprintf("TypeValidation (worker=%d)", w))
-
-		s.validateTask(ctx, t)
-
-		// check the outcome of validation and do things accordingly
-		switch t.status {
-		case statusValidated:
-			// task is possibly finished (can be re-validated by others)
-			TaskLog(t, "*** VALIDATED ***")
-			// informs queue that it's complete (any subsequent submission for idx unsets this)
-			queue.FinishTask(t.Index)
-			return true
-		case statusWaiting:
-			// task should be re-validated (waiting on others)
-			// how can we wait on dependencies?
-			TaskLog(t, "waiting/executed...revalidating")
-			if queue.DependenciesFinished(t.Index) {
+		s.WithTimer("TypeValidation", func() {
+			TaskLog(t, fmt.Sprintf("TypeValidation (worker=%d)", w))
+
+			s.validateTask(ctx, t)
+
+			// check the outcome of validation and do things accordingly
+			switch t.status {
+			case statusValidated:
+				// task is possibly finished (can be re-validated by others)
+				TaskLog(t, "*** VALIDATED ***")
+				// informs queue that it's complete (any subsequent submission for idx unsets this)
+				queue.FinishTask(t.Index)
+				return
+			case statusWaiting:
+				// task should be re-validated (waiting on others)
+				// how can we wait on dependencies?
+				TaskLog(t, "waiting/executed...revalidating")
+				if queue.DependenciesFinished(t.Index) {
+					queue.Execute(t.Index)
+				}
+			case statusInvalid:
+				TaskLog(t, "invalid (re-executing, re-validating > tx)")
 				queue.Execute(t.Index)
+			default:
+				TaskLog(t, "unexpected status")
+				panic("unexpected status ")
 			}
-		case statusInvalid:
-			TaskLog(t, "invalid (re-executing, re-validating > tx)")
-			queue.Execute(t.Index)
-		default:
-			TaskLog(t, "unexpected status")
-			panic("unexpected status ")
-		}
+		})
 
 	case TypeExecution:
-		t.ResetForExecution()
-		TaskLog(t, fmt.Sprintf("TypeExecution (worker=%d)", w))
-
-		s.executeTask(t)
-
-		if t.IsStatus(statusAborted) {
-			queue.ReExecute(t.Index)
-		} else {
-			TaskLog(t, fmt.Sprintf("FINISHING task EXECUTION (worker=%d, incarnation=%d)", w, t.Incarnation))
-			queue.FinishExecute(t.Index)
-			//TODO: speed this up, too slow to do every time
-			queue.ValidateLaterTasks(t.Index)
-		}
+		s.WithTimer("TypeExecution", func() {
+			t.ResetForExecution()
+			TaskLog(t, fmt.Sprintf("TypeExecution (worker=%d)", w))
+
+			s.executeTask(t)
+
+			if t.IsStatus(statusAborted) {
+				queue.Execute(t.Index)
+			} else {
+				TaskLog(t, fmt.Sprintf("FINISHING task EXECUTION (worker=%d, incarnation=%d)", w, t.Incarnation))
+				queue.FinishExecute(t.Index)
+				//TODO: speed this up, too slow to do every time
+				queue.ValidateLaterTasks(t.Index)
+			}
+		})
 
 	default:
 		TaskLog(t, "unexpected type")
 		panic("unexpected type")
 	}
-	return false
 }
diff --git a/tasks/scheduler_test.go b/tasks/scheduler_test.go
index 0f342efa2..a59ab4f20 100644
--- a/tasks/scheduler_test.go
+++ b/tasks/scheduler_test.go
@@ -127,10 +127,10 @@ func TestProcessAll(t *testing.T) {
 	}{
 		{
 			name:      "Test no overlap txs",
-			workers:   50,
+			workers:   500,
 			runs:      1,
 			addStores: true,
-			requests:  requestList(1000),
+			requests:  requestList(10000),
 			deliverTxFunc: func(ctx sdk.Context, req types.RequestDeliverTx) types.ResponseDeliverTx {
 				// all txs read and write to the same key to maximize conflicts
 				kv := ctx.MultiStore().GetKVStore(testStoreKey)
@@ -158,10 +158,10 @@ func TestProcessAll(t *testing.T) {
 		},
 		{
 			name:      "Test every tx accesses same key",
-			workers:   500,
+			workers:   50,
 			runs:      1,
 			addStores: true,
-			requests:  requestList(10000),
+			requests:  requestList(1000),
 			deliverTxFunc: func(ctx sdk.Context, req types.RequestDeliverTx) types.ResponseDeliverTx {
 				// all txs read and write to the same key to maximize conflicts
 				kv := ctx.MultiStore().GetKVStore(testStoreKey)
diff --git a/tasks/task_queue.go b/tasks/task_queue.go
index 65730a385..e6cb6ed2a 100644
--- a/tasks/task_queue.go
+++ b/tasks/task_queue.go
@@ -24,8 +24,6 @@ type Queue interface {
 	ExecuteAll()
 	// Execute executes a task
 	Execute(idx int)
-	// ReExecute re-executes a task that just executed
-	ReExecute(idx int)
 	// ReValidate re-validates a task.
 	ReValidate(idx int)
 	// FinishExecute marks a task as finished executing.
@@ -41,23 +39,25 @@ type Queue interface {
 }
 
 type taskQueue struct {
-	mx        sync.Mutex
-	condMx    sync.Mutex
-	heapMx    sync.Mutex
-	cond      *sync.Cond
-	once      sync.Once
-	executing map[int]struct{}
-	queued    sync.Map
-	finished  sync.Map
-	tasks     []*TxTask
-	queue     *taskHeap
-	closed    bool
+	lockTimerID string
+	mx          sync.Mutex
+	condMx      sync.Mutex
+	heapMx      sync.Mutex
+	cond        *sync.Cond
+	once        sync.Once
+	executing   map[int]struct{}
+	finished    sync.Map
+	tasks       []*TxTask
+	queue       *taskHeap
+	timer       *Timer
+	closed      bool
 }
 
 func NewTaskQueue(tasks []*TxTask) Queue {
 	sq := &taskQueue{
 		tasks:     tasks,
 		queue:     &taskHeap{},
+		timer:     NewTimer("Queue"),
 		executing: make(map[int]struct{}),
 	}
 	sq.cond = sync.NewCond(&sq.condMx)
@@ -93,11 +93,6 @@ func (sq *taskQueue) validate(idx int) {
 	}
 }
 
-func (sq *taskQueue) isQueued(idx int) bool {
-	_, ok := sq.queued.Load(idx)
-	return ok
-}
-
 func (sq *taskQueue) isExecuting(idx int) bool {
 	_, ok := sq.executing[idx]
 	return ok
@@ -105,8 +100,15 @@ func (sq *taskQueue) isExecuting(idx int) bool {
 
 // FinishExecute marks a task as finished executing and transitions directly validation
 func (sq *taskQueue) FinishExecute(idx int) {
+	id := sq.timer.Start("FinishExecute")
+	defer sq.timer.End("FinishExecute", id)
+
+	id2 := sq.timer.Start("FinishExecute-LOCK")
 	sq.lock()
-	defer sq.unlock()
+	defer func() {
+		sq.unlock()
+		sq.timer.End("FinishExecute-LOCK", id2)
+	}()
 
 	TaskLog(sq.tasks[idx], "-> finish task execute")
 
@@ -122,8 +124,14 @@ func (sq *taskQueue) FinishExecute(idx int) {
 // FinishTask marks a task as finished if nothing else queued it
 // this drives whether the queue thinks everything is done processing
 func (sq *taskQueue) FinishTask(idx int) {
+	id := sq.timer.Start("FinishTask")
+	defer sq.timer.End("FinishTask", id)
+	id2 := sq.timer.Start("FinishTask-LOCK")
 	sq.lock()
-	defer sq.unlock()
+	defer func() {
+		sq.unlock()
+		sq.timer.End("FinishTask-LOCK", id2)
+	}()
 
 	TaskLog(sq.tasks[idx], "FinishTask -> task is FINISHED (for now)")
 
@@ -132,6 +140,8 @@ func (sq *taskQueue) FinishTask(idx int) {
 
 // ReValidate re-validates a task (back to queue from validation)
 func (sq *taskQueue) ReValidate(idx int) {
+	id := sq.timer.Start("ReValidate")
+	defer sq.timer.End("ReValidate", id)
 	sq.lock()
 	defer sq.unlock()
 
@@ -144,31 +154,19 @@ func (sq *taskQueue) ReValidate(idx int) {
 }
 
 func (sq *taskQueue) Execute(idx int) {
+	id := sq.timer.Start("Execute-full")
+	defer sq.timer.End("Execute-full", id)
+	id3 := sq.timer.Start("Execute-LOCK")
 	sq.lock()
-	defer sq.unlock()
+	defer func() {
+		sq.unlock()
+		sq.timer.End("Execute-LOCK", id3)
+	}()
 
-	TaskLog(sq.tasks[idx], fmt.Sprintf("-> Execute (%d)", sq.tasks[idx].Incarnation))
+	id2 := sq.timer.Start("Execute-logic")
+	defer sq.timer.End("Execute-logic", id2)
 
-	if sq.isExecuting(idx) {
-		TaskLog(sq.tasks[idx], "task is executing (unexpected)")
-		panic("cannot execute an executing task")
-	}
-
-	sq.tasks[idx].Increment()
-	sq.execute(idx)
-}
-
-// ReExecute re-executes a task (back to queue from execution)
-func (sq *taskQueue) ReExecute(idx int) {
-	sq.lock()
-	defer sq.unlock()
-
-	TaskLog(sq.tasks[idx], fmt.Sprintf("-> RE-execute (%d)", sq.tasks[idx].Incarnation))
-
-	if !sq.isExecuting(idx) {
-		TaskLog(sq.tasks[idx], "task is not executing (unexpected)")
-		panic("cannot re-execute a non-executing task")
-	}
+	TaskLog(sq.tasks[idx], fmt.Sprintf("-> Execute (%d)", sq.tasks[idx].Incarnation))
 
 	sq.tasks[idx].Increment()
 	sq.execute(idx)
@@ -177,20 +175,27 @@ func (sq *taskQueue) ReExecute(idx int) {
 // ValidateLaterTasks marks all tasks after the given index as pending validation.
 // any executing tasks are skipped
 func (sq *taskQueue) ValidateLaterTasks(afterIdx int) {
-	sq.lock()
-	defer sq.unlock()
+	id := sq.timer.Start("ValidateLaterTasks")
+	defer sq.timer.End("ValidateLaterTasks", id)
 
 	for idx := afterIdx + 1; idx < len(sq.tasks); idx++ {
+		sq.lock()
 		sq.validate(idx)
+		sq.unlock()
 	}
 }
 
 func (sq *taskQueue) isFinished(idx int) bool {
+	id := sq.timer.Start("isFinished")
+	defer sq.timer.End("isFinished", id)
+
 	_, ok := sq.finished.Load(idx)
 	return ok && sq.tasks[idx].IsStatus(statusValidated)
 }
 
 func (sq *taskQueue) DependenciesFinished(idx int) bool {
+	id := sq.timer.Start("DependenciesFinished")
+	defer sq.timer.End("DependenciesFinished", id)
 	for _, dep := range sq.tasks[idx].Dependencies {
 		if !sq.isFinished(dep) {
 			return false
@@ -201,6 +206,8 @@ func (sq *taskQueue) DependenciesFinished(idx int) bool {
 
 // IsCompleted returns true if all tasks are "finished"
 func (sq *taskQueue) IsCompleted() bool {
+	id := sq.timer.Start("IsCompleted")
+	defer sq.timer.End("IsCompleted", id)
 	if len(*sq.queue) == 0 {
 		for _, t := range sq.tasks {
 			if !sq.isFinished(t.Index) {
@@ -214,9 +221,10 @@ func (sq *taskQueue) IsCompleted() bool {
 }
 
 func (sq *taskQueue) pushTask(idx int, taskType TaskType) {
+	id := sq.timer.Start("pushTask")
+	defer sq.timer.End("pushTask", id)
 	sq.condMx.Lock()
 	defer sq.condMx.Unlock()
-	sq.queued.Store(idx, struct{}{})
 	TaskLog(sq.tasks[idx], fmt.Sprintf("-> PUSH task (%s/%d)", taskType, sq.tasks[idx].Incarnation))
 	heap.Push(sq.queue, idx)
 	sq.cond.Broadcast()
@@ -224,11 +232,13 @@ func (sq *taskQueue) pushTask(idx int, taskType TaskType) {
 
 // ExecuteAll executes all tasks in the queue (called to start processing)
 func (sq *taskQueue) ExecuteAll() {
-	sq.lock()
-	defer sq.unlock()
+	id := sq.timer.Start("ExecuteAll")
+	defer sq.timer.End("ExecuteAll", id)
 
 	for idx := range sq.tasks {
+		sq.lock()
 		sq.execute(idx)
+		sq.unlock()
 	}
 }
 
@@ -251,8 +261,6 @@ func (sq *taskQueue) NextTask() (*TxTask, bool) {
 	idx := heap.Pop(sq.queue).(int)
 	sq.heapMx.Unlock()
 
-	defer sq.queued.Delete(idx)
-
 	res := sq.tasks[idx]
 
 	TaskLog(res, fmt.Sprintf("<- POP task (%d)", res.Incarnation))
@@ -267,6 +275,7 @@ func (sq *taskQueue) Close() {
 		defer sq.condMx.Unlock()
 		sq.closed = true
 		sq.cond.Broadcast()
+		sq.timer.PrintReport()
 	})
 }
 
diff --git a/tasks/task_queue_test.go b/tasks/task_queue_test.go
index 22686afd8..29696daa5 100644
--- a/tasks/task_queue_test.go
+++ b/tasks/task_queue_test.go
@@ -55,14 +55,6 @@ func TestSchedulerQueue(t *testing.T) {
 	queue.FinishExecute(nextTask.Index)
 	assertValidating(t, nextTask)
 
-	// Test Execute->ReExecute leads to Execution
-	queue, tasks = testQueue()
-	queue.ExecuteAll()
-	nextTask, ok = queue.NextTask()
-	assert.True(t, ok)
-	queue.ReExecute(nextTask.Index)
-	assertExecuting(t, nextTask)
-
 	// Test that validation doesn't happen for executing task
 	queue, tasks = testQueue()
 	queue.ExecuteAll()
diff --git a/tasks/timer.go b/tasks/timer.go
new file mode 100644
index 000000000..77ea1ca28
--- /dev/null
+++ b/tasks/timer.go
@@ -0,0 +1,134 @@
+package tasks
+
+import (
+	"fmt"
+	"github.com/google/uuid"
+	"sort"
+	"strings"
+	"sync"
+	"time"
+)
+
+type Timer struct {
+	name    string
+	mx      sync.Mutex
+	reports map[string]*TimerReport
+	ch      chan timeEvt
+}
+
+type timeEvt struct {
+	start     bool
+	name      string
+	id        string
+	timestamp time.Time
+}
+
+type TimerReport struct {
+	name    string
+	initial time.Time
+	starts  map[string]time.Time
+	times   []time.Duration
+}
+
+func NewTimer(name string) *Timer {
+	t := &Timer{
+		name:    name,
+		reports: make(map[string]*TimerReport),
+		ch:      make(chan timeEvt, 10000),
+	}
+	go t.consume()
+	return t
+}
+
+func (t *Timer) consume() {
+	for evt := range t.ch {
+		if evt.start {
+			if _, ok := t.reports[evt.name]; !ok {
+				t.reports[evt.name] = &TimerReport{
+					starts: make(map[string]time.Time),
+					times:  nil,
+				}
+			}
+			t.reports[evt.name].starts[evt.id] = evt.timestamp
+		} else {
+			if rpt, ok := t.reports[evt.name]; ok {
+				if start, ok := rpt.starts[evt.id]; ok {
+					rpt.times = append(rpt.times, evt.timestamp.Sub(start))
+				}
+			}
+		}
+	}
+}
+
+func (t *Timer) PrintReport() {
+	t.mx.Lock()
+	defer t.mx.Unlock()
+
+	var reports []*TimerReport
+	for name, rpt := range t.reports {
+		rpt.name = name
+		reports = append(reports, rpt)
+	}
+
+	// Sort the slice by the sum of durations
+	sort.Slice(reports, func(i, j int) bool {
+		sumI := time.Duration(0)
+		for _, d := range reports[i].times {
+			sumI += d
+		}
+
+		sumJ := time.Duration(0)
+		for _, d := range reports[j].times {
+			sumJ += d
+		}
+
+		return sumI < sumJ
+	})
+
+	lines := []string{}
+	for _, rpt := range reports {
+		var sum time.Duration
+		count := len(rpt.times)
+		minDuration := time.Hour
+		maxDuration := time.Duration(0)
+		for _, d := range rpt.times {
+			sum += d
+			if d < minDuration {
+				minDuration = d
+			}
+			if d > maxDuration {
+				maxDuration = d
+			}
+		}
+		avg := sum / time.Duration(count)
+		lines = append(lines, fmt.Sprintf("%-15s: \tsum=%-15s\tavg=%-15s\tmin=%-15s\tmax=%-15s\tcount=%-15d %s", t.name, sum, avg, minDuration, maxDuration, count, rpt.name))
+	}
+	fmt.Println(strings.Join(lines, "\n"))
+}
+
+func (t *Timer) Start(name string) string {
+	id := uuid.New().String()
+	go func() {
+		t.mx.Lock()
+		defer t.mx.Unlock()
+
+		if _, ok := t.reports[name]; !ok {
+			t.reports[name] = &TimerReport{
+				starts: make(map[string]time.Time),
+				times:  nil,
+			}
+		}
+		t.reports[name].starts[id] = time.Now()
+	}()
+	return id
+}
+
+func (t *Timer) End(name string, id string) {
+	t.mx.Lock()
+	defer t.mx.Unlock()
+	if rpt, ok := t.reports[name]; ok {
+		if start, ok := rpt.starts[id]; ok {
+			rpt.times = append(rpt.times, time.Now().Sub(start))
+		}
+	}
+}
diff --git a/tasks/utils.go b/tasks/utils.go
index 7650062c4..23ea6773a 100644
--- a/tasks/utils.go
+++ b/tasks/utils.go
@@ -1,7 +1,6 @@
 package tasks
 
 import (
-	"context"
 	"crypto/sha256"
 	"fmt"
 	"github.com/cosmos/cosmos-sdk/store/multiversion"
@@ -9,8 +8,6 @@ import (
 	"github.com/tendermint/tendermint/abci/types"
 	"go.opentelemetry.io/otel/attribute"
 	"go.opentelemetry.io/otel/trace"
-
-	"time"
 )
 
 // TODO: remove after things work
@@ -19,27 +16,6 @@ func TaskLog(task *TxTask, msg string) {
 	//fmt.Println(fmt.Sprintf("%d: Task(%d/%s/%d):\t%s", time.Now().UnixMicro(), task.Index, task.status, task.Incarnation, msg))
 }
 
-// TODO: remove after things work
-// waitWithMsg prints a message every 1s if not cancelled (for hang situations)
-func waitWithMsg(msg string, handlers ...func()) context.CancelFunc {
-	goctx, cancel := context.WithCancel(context.Background())
-	tick := time.NewTicker(1 * time.Second)
-	go func() {
-		for {
-			select {
-			case <-goctx.Done():
-				return
-			case <-tick.C:
-				fmt.Println(msg)
-				for _, h := range handlers {
-					h()
-				}
-			}
-		}
-	}()
-	return cancel
-}
-
 func (s *scheduler) traceSpan(ctx sdk.Context, name string, task *TxTask) (sdk.Context, trace.Span) {
 	spanCtx, span := s.tracingInfo.StartWithContext(name, ctx.TraceSpanContext())
 	if task != nil {

From 4b7cfc145bf16a821d3952214431cb0bbb02cb1a Mon Sep 17 00:00:00 2001
From: Steven Landers <steven.landers@gmail.com>
Date: Thu, 30 Nov 2023 15:43:04 -0500
Subject: [PATCH 40/65] performance improvements

---
 tasks/scheduler.go       |   6 +-
 tasks/scheduler_test.go  |   2 +-
 tasks/task_queue.go      | 162 +++++++++++++++------------------------
 tasks/task_validation.go |   4 +
 tasks/timer.go           |   3 +
 5 files changed, 75 insertions(+), 102 deletions(-)

diff --git a/tasks/scheduler.go b/tasks/scheduler.go
index 5b758154a..65877558c 100644
--- a/tasks/scheduler.go
+++ b/tasks/scheduler.go
@@ -129,7 +129,8 @@ func (s *scheduler) ProcessAll(ctx sdk.Context, reqs []*sdk.DeliverTxEntry) ([]t
 	return results, err
 }
 
-func (s *scheduler) processTask(ctx sdk.Context, taskType TaskType, w int, t *TxTask, queue Queue) {
+func (s *scheduler) processTask(ctx sdk.Context, taskType TaskType, w int, t *TxTask, queue Queue) bool {
+	var result bool
 	switch taskType {
 	case TypeValidation:
 		s.WithTimer("TypeValidation", func() {
@@ -144,7 +145,7 @@ func (s *scheduler) processTask(ctx sdk.Context, taskType TaskType, w int, t *Tx
 				TaskLog(t, "*** VALIDATED ***")
 				// informs queue that it's complete (any subsequent submission for idx unsets this)
 				queue.FinishTask(t.Index)
-				return
+				result = true
 			case statusWaiting:
 				// task should be re-validated (waiting on others)
 				// how can we wait on dependencies?
@@ -182,4 +183,5 @@ func (s *scheduler) processTask(ctx sdk.Context, taskType TaskType, w int, t *Tx
 		TaskLog(t, "unexpected type")
 		panic("unexpected type")
 	}
+	return result
 }
diff --git a/tasks/scheduler_test.go b/tasks/scheduler_test.go
index a59ab4f20..7d3160f16 100644
--- a/tasks/scheduler_test.go
+++ b/tasks/scheduler_test.go
@@ -161,7 +161,7 @@ func TestProcessAll(t *testing.T) {
 			workers:   50,
 			runs:      1,
 			addStores: true,
-			requests:  requestList(1000),
+			requests:  requestList(50),
 			deliverTxFunc: func(ctx sdk.Context, req types.RequestDeliverTx) types.ResponseDeliverTx {
 				// all txs read and write to the same key to maximize conflicts
 				kv := ctx.MultiStore().GetKVStore(testStoreKey)
diff --git a/tasks/task_queue.go b/tasks/task_queue.go
index e6cb6ed2a..eace060c3 100644
--- a/tasks/task_queue.go
+++ b/tasks/task_queue.go
@@ -1,10 +1,10 @@
 package tasks
 
 import (
-	"container/heap"
 	"fmt"
 	"sort"
 	"sync"
+	"sync/atomic"
 )
 
 type TaskType string
@@ -39,26 +39,28 @@ type Queue interface {
 }
 
 type taskQueue struct {
-	lockTimerID string
-	mx          sync.Mutex
-	condMx      sync.Mutex
-	heapMx      sync.Mutex
-	cond        *sync.Cond
-	once        sync.Once
-	executing   map[int]struct{}
-	finished    sync.Map
-	tasks       []*TxTask
-	queue       *taskHeap
-	timer       *Timer
-	closed      bool
+	lockTimerID   string
+	mx            sync.Mutex
+	condMx        sync.Mutex
+	heapMx        sync.Mutex
+	cond          *sync.Cond
+	once          sync.Once
+	executing     sync.Map
+	finished      sync.Map
+	finishedCount atomic.Int32
+
+	out   chan int
+	tasks []*TxTask
+	queue *taskHeap
+	timer *Timer
 }
 
 func NewTaskQueue(tasks []*TxTask) Queue {
 	sq := &taskQueue{
-		tasks:     tasks,
-		queue:     &taskHeap{},
-		timer:     NewTimer("Queue"),
-		executing: make(map[int]struct{}),
+		tasks: tasks,
+		queue: &taskHeap{},
+		timer: NewTimer("Queue"),
+		out:   make(chan int, len(tasks)),
 	}
 	sq.cond = sync.NewCond(&sq.condMx)
 
@@ -74,27 +76,37 @@ func (sq *taskQueue) unlock() {
 }
 
 func (sq *taskQueue) execute(idx int) {
-	if sq.tasks[idx].SetTaskType(TypeExecution) {
-		TaskLog(sq.tasks[idx], "-> execute")
-		sq.finished.Delete(idx)
-		sq.executing[idx] = struct{}{}
+	if sq.getTask(idx).SetTaskType(TypeExecution) {
+		TaskLog(sq.getTask(idx), "-> execute")
+
+		if sq.isFinished(idx) {
+			sq.finished.Delete(idx)
+			sq.finishedCount.Add(-1)
+		}
+
+		sq.executing.Store(idx, struct{}{})
 		sq.pushTask(idx, TypeExecution)
 	}
 }
 
+func (sq *taskQueue) getTask(idx int) *TxTask {
+	return sq.tasks[idx]
+}
+
 func (sq *taskQueue) validate(idx int) {
+	task := sq.getTask(idx)
 	if sq.isExecuting(idx) {
-		TaskLog(sq.tasks[idx], "(skip validating, executing...)")
+		TaskLog(task, "(skip validating, executing...)")
 		return
 	}
-	if sq.tasks[idx].SetTaskType(TypeValidation) {
-		TaskLog(sq.tasks[idx], "-> validate")
+	if sq.getTask(idx).SetTaskType(TypeValidation) {
+		TaskLog(task, "-> validate")
 		sq.pushTask(idx, TypeValidation)
 	}
 }
 
 func (sq *taskQueue) isExecuting(idx int) bool {
-	_, ok := sq.executing[idx]
+	_, ok := sq.executing.Load(idx)
 	return ok
 }
 
@@ -103,38 +115,29 @@ func (sq *taskQueue) FinishExecute(idx int) {
 	id := sq.timer.Start("FinishExecute")
 	defer sq.timer.End("FinishExecute", id)
 
-	id2 := sq.timer.Start("FinishExecute-LOCK")
-	sq.lock()
-	defer func() {
-		sq.unlock()
-		sq.timer.End("FinishExecute-LOCK", id2)
-	}()
-
-	TaskLog(sq.tasks[idx], "-> finish task execute")
+	TaskLog(sq.getTask(idx), "-> finish task execute")
 
 	if !sq.isExecuting(idx) {
-		TaskLog(sq.tasks[idx], "not executing, but trying to finish execute")
+		TaskLog(sq.getTask(idx), "not executing, but trying to finish execute")
 		panic("not executing, but trying to finish execute")
 	}
 
-	delete(sq.executing, idx)
+	sq.executing.Delete(idx)
 	sq.validate(idx)
 }
 
 // FinishTask marks a task as finished if nothing else queued it
 // this drives whether the queue thinks everything is done processing
 func (sq *taskQueue) FinishTask(idx int) {
+	if sq.isFinished(idx) {
+		return
+	}
+
 	id := sq.timer.Start("FinishTask")
 	defer sq.timer.End("FinishTask", id)
-	id2 := sq.timer.Start("FinishTask-LOCK")
-	sq.lock()
-	defer func() {
-		sq.unlock()
-		sq.timer.End("FinishTask-LOCK", id2)
-	}()
-
-	TaskLog(sq.tasks[idx], "FinishTask -> task is FINISHED (for now)")
 
+	TaskLog(sq.getTask(idx), "FinishTask -> task is FINISHED (for now)")
+	sq.finishedCount.Add(1)
 	sq.finished.Store(idx, struct{}{})
 }
 
@@ -142,11 +145,9 @@ func (sq *taskQueue) FinishTask(idx int) {
 func (sq *taskQueue) ReValidate(idx int) {
 	id := sq.timer.Start("ReValidate")
 	defer sq.timer.End("ReValidate", id)
-	sq.lock()
-	defer sq.unlock()
 
 	if sq.isExecuting(idx) {
-		TaskLog(sq.tasks[idx], "task is executing (unexpected)")
+		TaskLog(sq.getTask(idx), "task is executing (unexpected)")
 		panic("cannot re-validate an executing task")
 	}
 
@@ -154,21 +155,13 @@ func (sq *taskQueue) ReValidate(idx int) {
 }
 
 func (sq *taskQueue) Execute(idx int) {
-	id := sq.timer.Start("Execute-full")
-	defer sq.timer.End("Execute-full", id)
-	id3 := sq.timer.Start("Execute-LOCK")
-	sq.lock()
-	defer func() {
-		sq.unlock()
-		sq.timer.End("Execute-LOCK", id3)
-	}()
+	id := sq.timer.Start("Execute")
+	defer sq.timer.End("Execute", id)
 
-	id2 := sq.timer.Start("Execute-logic")
-	defer sq.timer.End("Execute-logic", id2)
-
-	TaskLog(sq.tasks[idx], fmt.Sprintf("-> Execute (%d)", sq.tasks[idx].Incarnation))
-
-	sq.tasks[idx].Increment()
+	//TODO: might need lock here
+	task := sq.tasks[idx]
+	TaskLog(task, fmt.Sprintf("-> Execute (%d)", sq.getTask(idx).Incarnation))
+	task.Increment()
 	sq.execute(idx)
 }
 
@@ -179,9 +172,7 @@ func (sq *taskQueue) ValidateLaterTasks(afterIdx int) {
 	defer sq.timer.End("ValidateLaterTasks", id)
 
 	for idx := afterIdx + 1; idx < len(sq.tasks); idx++ {
-		sq.lock()
 		sq.validate(idx)
-		sq.unlock()
 	}
 }
 
@@ -190,13 +181,13 @@ func (sq *taskQueue) isFinished(idx int) bool {
 	defer sq.timer.End("isFinished", id)
 
 	_, ok := sq.finished.Load(idx)
-	return ok && sq.tasks[idx].IsStatus(statusValidated)
+	return ok && sq.getTask(idx).IsStatus(statusValidated)
 }
 
 func (sq *taskQueue) DependenciesFinished(idx int) bool {
 	id := sq.timer.Start("DependenciesFinished")
 	defer sq.timer.End("DependenciesFinished", id)
-	for _, dep := range sq.tasks[idx].Dependencies {
+	for _, dep := range sq.getTask(idx).Dependencies {
 		if !sq.isFinished(dep) {
 			return false
 		}
@@ -208,26 +199,15 @@ func (sq *taskQueue) DependenciesFinished(idx int) bool {
 func (sq *taskQueue) IsCompleted() bool {
 	id := sq.timer.Start("IsCompleted")
 	defer sq.timer.End("IsCompleted", id)
-	if len(*sq.queue) == 0 {
-		for _, t := range sq.tasks {
-			if !sq.isFinished(t.Index) {
-				TaskLog(t, "not finished yet")
-				return false
-			}
-		}
-		return true
-	}
-	return false
+	fc := sq.finishedCount.Load()
+	return fc == int32(len(sq.tasks))
 }
 
 func (sq *taskQueue) pushTask(idx int, taskType TaskType) {
 	id := sq.timer.Start("pushTask")
 	defer sq.timer.End("pushTask", id)
-	sq.condMx.Lock()
-	defer sq.condMx.Unlock()
-	TaskLog(sq.tasks[idx], fmt.Sprintf("-> PUSH task (%s/%d)", taskType, sq.tasks[idx].Incarnation))
-	heap.Push(sq.queue, idx)
-	sq.cond.Broadcast()
+	TaskLog(sq.getTask(idx), fmt.Sprintf("-> PUSH task (%s/%d)", taskType, sq.getTask(idx).Incarnation))
+	sq.out <- idx
 }
 
 // ExecuteAll executes all tasks in the queue (called to start processing)
@@ -246,35 +226,19 @@ func (sq *taskQueue) ExecuteAll() {
 // this hangs if no tasks are ready because it's possible a new task might arrive
 // closing the queue causes NextTask to return false immediately
 func (sq *taskQueue) NextTask() (*TxTask, bool) {
-	sq.condMx.Lock()
-	defer sq.condMx.Unlock()
-
-	for len(*sq.queue) == 0 && !sq.closed {
-		sq.cond.Wait()
-	}
-
-	if sq.closed {
+	idx, open := <-sq.out
+	if !open {
 		return nil, false
 	}
-
-	sq.heapMx.Lock()
-	idx := heap.Pop(sq.queue).(int)
-	sq.heapMx.Unlock()
-
-	res := sq.tasks[idx]
-
+	res := sq.getTask(idx)
 	TaskLog(res, fmt.Sprintf("<- POP task (%d)", res.Incarnation))
-
 	return res, true
 }
 
 // Close closes the queue, causing NextTask to return false.
 func (sq *taskQueue) Close() {
 	sq.once.Do(func() {
-		sq.condMx.Lock()
-		defer sq.condMx.Unlock()
-		sq.closed = true
-		sq.cond.Broadcast()
+		close(sq.out)
 		sq.timer.PrintReport()
 	})
 }
diff --git a/tasks/task_validation.go b/tasks/task_validation.go
index 0c8d303d9..601a37dd8 100644
--- a/tasks/task_validation.go
+++ b/tasks/task_validation.go
@@ -32,6 +32,10 @@ func (s *scheduler) invalidateTask(task *TxTask) {
 	}
 }
 
+func (s *scheduler) mockValidateTask(ctx sdk.Context, task *TxTask) {
+	task.SetStatus(statusValidated)
+}
+
 func (s *scheduler) validateTask(ctx sdk.Context, task *TxTask) {
 	_, span := s.traceSpan(ctx, "SchedulerValidate", task)
 	defer span.End()
diff --git a/tasks/timer.go b/tasks/timer.go
index 77ea1ca28..bac767a81 100644
--- a/tasks/timer.go
+++ b/tasks/timer.go
@@ -100,6 +100,9 @@ func (t *Timer) PrintReport() {
 				maxDuration = d
 			}
 		}
+		if count == 0 {
+			continue
+		}
 		avg := sum / time.Duration(count)
 		lines = append(lines, fmt.Sprintf("%-15s: \tsum=%-15s\tavg=%-15s\tmin=%-15s\tmax=%-15s\tcount=%-15d %s", t.name, sum, avg, minDuration, maxDuration, count, rpt.name))
 	}

From 62b93ad607754b871f93cac59c01e91037cb6fff Mon Sep 17 00:00:00 2001
From: Steven Landers <steven.landers@gmail.com>
Date: Thu, 30 Nov 2023 17:46:15 -0500
Subject: [PATCH 41/65] add moar speed

---
 tasks/scheduler.go       | 122 +++++++++++++++------------------------
 tasks/scheduler_test.go  |   4 +-
 tasks/task_queue.go      |  60 +------------------
 tasks/task_queue_test.go |  21 -------
 4 files changed, 50 insertions(+), 157 deletions(-)

diff --git a/tasks/scheduler.go b/tasks/scheduler.go
index 65877558c..75b9ab252 100644
--- a/tasks/scheduler.go
+++ b/tasks/scheduler.go
@@ -7,7 +7,6 @@ import (
 	"github.com/cosmos/cosmos-sdk/utils/tracing"
 	"github.com/tendermint/tendermint/abci/types"
 	"sync"
-	"sync/atomic"
 )
 
 // Scheduler processes tasks concurrently
@@ -46,7 +45,6 @@ func (s *scheduler) ProcessAll(ctx sdk.Context, reqs []*sdk.DeliverTxEntry) ([]t
 	var results []types.ResponseDeliverTx
 	var err error
 	s.WithTimer("ProcessAll", func() {
-		pas := s.timer.Start("ProcessAll-Setup")
 		// initialize mutli-version stores
 		s.initMultiVersionStore(ctx)
 		// prefill estimates
@@ -67,47 +65,27 @@ func (s *scheduler) ProcessAll(ctx sdk.Context, reqs []*sdk.DeliverTxEntry) ([]t
 
 		wg := sync.WaitGroup{}
 		wg.Add(workers)
-		count := atomic.Int32{}
 
-		s.timer.End("ProcessAll-Setup", pas)
 		for i := 0; i < workers; i++ {
 			go func(worker int) {
 				defer wg.Done()
 
 				for {
+					if queue.IsCompleted() {
+						queue.Close()
+					}
 
-					s.WithTimer("IsCompleted()", func() {
-						if queue.IsCompleted() {
-							queue.Close()
-						}
-					})
-
-					var task *TxTask
-					var anyTasks bool
-					s.WithTimer("NextTask()", func() {
-						task, anyTasks = queue.NextTask()
-					})
+					task, anyTasks := queue.NextTask()
 					if !anyTasks {
 						return
 					}
 
-					s.WithTimer("IsCompleted()", func() {
-						task.LockTask()
-						var tt TaskType
-						var ok bool
-						s.WithTimer("PopTaskType()", func() {
-							tt, ok = task.PopTaskType()
-						})
-						if ok {
-							count.Add(1)
-							s.WithTimer("processTask()", func() {
-								s.processTask(ctx, tt, worker, task, queue)
-							})
-						} else {
-							TaskLog(task, "NONE FOUND...SKIPPING")
-						}
-						task.UnlockTask()
-					})
+					// removing this lock creates a lot more tasks
+					task.LockTask()
+					if tt, ok := task.PopTaskType(); ok {
+						s.processTask(ctx, tt, worker, task, queue)
+					}
+					task.UnlockTask()
 
 				}
 
@@ -116,8 +94,6 @@ func (s *scheduler) ProcessAll(ctx sdk.Context, reqs []*sdk.DeliverTxEntry) ([]t
 
 		wg.Wait()
 
-		fmt.Println("count", count.Load())
-
 		for _, mv := range s.multiVersionStores {
 			mv.WriteLatestToStore()
 		}
@@ -133,51 +109,47 @@ func (s *scheduler) processTask(ctx sdk.Context, taskType TaskType, w int, t *Tx
 	var result bool
 	switch taskType {
 	case TypeValidation:
-		s.WithTimer("TypeValidation", func() {
-			TaskLog(t, fmt.Sprintf("TypeValidation (worker=%d)", w))
-
-			s.validateTask(ctx, t)
-
-			// check the outcome of validation and do things accordingly
-			switch t.status {
-			case statusValidated:
-				// task is possibly finished (can be re-validated by others)
-				TaskLog(t, "*** VALIDATED ***")
-				// informs queue that it's complete (any subsequent submission for idx unsets this)
-				queue.FinishTask(t.Index)
-				result = true
-			case statusWaiting:
-				// task should be re-validated (waiting on others)
-				// how can we wait on dependencies?
-				TaskLog(t, "waiting/executed...revalidating")
-				if queue.DependenciesFinished(t.Index) {
-					queue.Execute(t.Index)
-				}
-			case statusInvalid:
-				TaskLog(t, "invalid (re-executing, re-validating > tx)")
+		TaskLog(t, fmt.Sprintf("TypeValidation (worker=%d)", w))
+
+		s.validateTask(ctx, t)
+
+		// check the outcome of validation and do things accordingly
+		switch t.status {
+		case statusValidated:
+			// task is possibly finished (can be re-validated by others)
+			TaskLog(t, "*** VALIDATED ***")
+			// informs queue that it's complete (any subsequent submission for idx unsets this)
+			queue.FinishTask(t.Index)
+			result = true
+		case statusWaiting:
+			// task should be re-validated (waiting on others)
+			// how can we wait on dependencies?
+			TaskLog(t, "waiting/executed...revalidating")
+			if queue.DependenciesFinished(t.Index) {
 				queue.Execute(t.Index)
-			default:
-				TaskLog(t, "unexpected status")
-				panic("unexpected status ")
 			}
-		})
+		case statusInvalid:
+			TaskLog(t, "invalid (re-executing, re-validating > tx)")
+			queue.Execute(t.Index)
+		default:
+			TaskLog(t, "unexpected status")
+			panic("unexpected status ")
+		}
 
 	case TypeExecution:
-		s.WithTimer("TypeExecution", func() {
-			t.ResetForExecution()
-			TaskLog(t, fmt.Sprintf("TypeExecution (worker=%d)", w))
-
-			s.executeTask(t)
-
-			if t.IsStatus(statusAborted) {
-				queue.Execute(t.Index)
-			} else {
-				TaskLog(t, fmt.Sprintf("FINISHING task EXECUTION (worker=%d, incarnation=%d)", w, t.Incarnation))
-				queue.FinishExecute(t.Index)
-				//TODO: speed this up, too slow to do every time
-				queue.ValidateLaterTasks(t.Index)
-			}
-		})
+		t.ResetForExecution()
+		TaskLog(t, fmt.Sprintf("TypeExecution (worker=%d)", w))
+
+		s.executeTask(t)
+
+		if t.IsStatus(statusAborted) {
+			queue.Execute(t.Index)
+		} else {
+			TaskLog(t, fmt.Sprintf("FINISHING task EXECUTION (worker=%d, incarnation=%d)", w, t.Incarnation))
+			queue.FinishExecute(t.Index)
+			//TODO: speed this up
+			queue.ValidateLaterTasks(t.Index)
+		}
 
 	default:
 		TaskLog(t, "unexpected type")
diff --git a/tasks/scheduler_test.go b/tasks/scheduler_test.go
index 7d3160f16..291ec4164 100644
--- a/tasks/scheduler_test.go
+++ b/tasks/scheduler_test.go
@@ -158,10 +158,10 @@ func TestProcessAll(t *testing.T) {
 		},
 		{
 			name:      "Test every tx accesses same key",
-			workers:   50,
+			workers:   500,
 			runs:      1,
 			addStores: true,
-			requests:  requestList(50),
+			requests:  requestList(10000),
 			deliverTxFunc: func(ctx sdk.Context, req types.RequestDeliverTx) types.ResponseDeliverTx {
 				// all txs read and write to the same key to maximize conflicts
 				kv := ctx.MultiStore().GetKVStore(testStoreKey)
diff --git a/tasks/task_queue.go b/tasks/task_queue.go
index eace060c3..d9a86b825 100644
--- a/tasks/task_queue.go
+++ b/tasks/task_queue.go
@@ -2,7 +2,6 @@ package tasks
 
 import (
 	"fmt"
-	"sort"
 	"sync"
 	"sync/atomic"
 )
@@ -51,16 +50,12 @@ type taskQueue struct {
 
 	out   chan int
 	tasks []*TxTask
-	queue *taskHeap
-	timer *Timer
 }
 
 func NewTaskQueue(tasks []*TxTask) Queue {
 	sq := &taskQueue{
 		tasks: tasks,
-		queue: &taskHeap{},
-		timer: NewTimer("Queue"),
-		out:   make(chan int, len(tasks)),
+		out:   make(chan int, len(tasks)*10),
 	}
 	sq.cond = sync.NewCond(&sq.condMx)
 
@@ -112,8 +107,6 @@ func (sq *taskQueue) isExecuting(idx int) bool {
 
 // FinishExecute marks a task as finished executing and transitions directly validation
 func (sq *taskQueue) FinishExecute(idx int) {
-	id := sq.timer.Start("FinishExecute")
-	defer sq.timer.End("FinishExecute", id)
 
 	TaskLog(sq.getTask(idx), "-> finish task execute")
 
@@ -133,9 +126,6 @@ func (sq *taskQueue) FinishTask(idx int) {
 		return
 	}
 
-	id := sq.timer.Start("FinishTask")
-	defer sq.timer.End("FinishTask", id)
-
 	TaskLog(sq.getTask(idx), "FinishTask -> task is FINISHED (for now)")
 	sq.finishedCount.Add(1)
 	sq.finished.Store(idx, struct{}{})
@@ -143,8 +133,6 @@ func (sq *taskQueue) FinishTask(idx int) {
 
 // ReValidate re-validates a task (back to queue from validation)
 func (sq *taskQueue) ReValidate(idx int) {
-	id := sq.timer.Start("ReValidate")
-	defer sq.timer.End("ReValidate", id)
 
 	if sq.isExecuting(idx) {
 		TaskLog(sq.getTask(idx), "task is executing (unexpected)")
@@ -155,10 +143,6 @@ func (sq *taskQueue) ReValidate(idx int) {
 }
 
 func (sq *taskQueue) Execute(idx int) {
-	id := sq.timer.Start("Execute")
-	defer sq.timer.End("Execute", id)
-
-	//TODO: might need lock here
 	task := sq.tasks[idx]
 	TaskLog(task, fmt.Sprintf("-> Execute (%d)", sq.getTask(idx).Incarnation))
 	task.Increment()
@@ -168,8 +152,6 @@ func (sq *taskQueue) Execute(idx int) {
 // ValidateLaterTasks marks all tasks after the given index as pending validation.
 // any executing tasks are skipped
 func (sq *taskQueue) ValidateLaterTasks(afterIdx int) {
-	id := sq.timer.Start("ValidateLaterTasks")
-	defer sq.timer.End("ValidateLaterTasks", id)
 
 	for idx := afterIdx + 1; idx < len(sq.tasks); idx++ {
 		sq.validate(idx)
@@ -177,16 +159,11 @@ func (sq *taskQueue) ValidateLaterTasks(afterIdx int) {
 }
 
 func (sq *taskQueue) isFinished(idx int) bool {
-	id := sq.timer.Start("isFinished")
-	defer sq.timer.End("isFinished", id)
-
 	_, ok := sq.finished.Load(idx)
 	return ok && sq.getTask(idx).IsStatus(statusValidated)
 }
 
 func (sq *taskQueue) DependenciesFinished(idx int) bool {
-	id := sq.timer.Start("DependenciesFinished")
-	defer sq.timer.End("DependenciesFinished", id)
 	for _, dep := range sq.getTask(idx).Dependencies {
 		if !sq.isFinished(dep) {
 			return false
@@ -197,24 +174,17 @@ func (sq *taskQueue) DependenciesFinished(idx int) bool {
 
 // IsCompleted returns true if all tasks are "finished"
 func (sq *taskQueue) IsCompleted() bool {
-	id := sq.timer.Start("IsCompleted")
-	defer sq.timer.End("IsCompleted", id)
 	fc := sq.finishedCount.Load()
 	return fc == int32(len(sq.tasks))
 }
 
 func (sq *taskQueue) pushTask(idx int, taskType TaskType) {
-	id := sq.timer.Start("pushTask")
-	defer sq.timer.End("pushTask", id)
 	TaskLog(sq.getTask(idx), fmt.Sprintf("-> PUSH task (%s/%d)", taskType, sq.getTask(idx).Incarnation))
 	sq.out <- idx
 }
 
 // ExecuteAll executes all tasks in the queue (called to start processing)
 func (sq *taskQueue) ExecuteAll() {
-	id := sq.timer.Start("ExecuteAll")
-	defer sq.timer.End("ExecuteAll", id)
-
 	for idx := range sq.tasks {
 		sq.lock()
 		sq.execute(idx)
@@ -239,33 +209,5 @@ func (sq *taskQueue) NextTask() (*TxTask, bool) {
 func (sq *taskQueue) Close() {
 	sq.once.Do(func() {
 		close(sq.out)
-		sq.timer.PrintReport()
 	})
 }
-
-type taskHeap []int
-
-func (h taskHeap) Len() int           { return len(h) }
-func (h taskHeap) Less(i, j int) bool { return h[i] < h[j] }
-func (h taskHeap) Swap(i, j int)      { h[i], h[j] = h[j], h[i] }
-
-func (h *taskHeap) Push(x interface{}) {
-	// Check if the integer already exists in the heap
-	for _, item := range *h {
-		if item == x.(int) {
-			return
-		}
-	}
-	// If it doesn't exist, append it
-	*h = append(*h, x.(int))
-	// Sort the heap
-	sort.Ints(*h)
-}
-
-func (h *taskHeap) Pop() interface{} {
-	old := *h
-	n := len(old)
-	x := old[n-1]
-	*h = old[0 : n-1]
-	return x
-}
diff --git a/tasks/task_queue_test.go b/tasks/task_queue_test.go
index 29696daa5..11302def8 100644
--- a/tasks/task_queue_test.go
+++ b/tasks/task_queue_test.go
@@ -1,7 +1,6 @@
 package tasks
 
 import (
-	"container/heap"
 	"github.com/stretchr/testify/assert"
 	"testing"
 )
@@ -85,23 +84,3 @@ func TestSchedulerQueue(t *testing.T) {
 	}
 	assert.True(t, queue.IsCompleted())
 }
-
-func TestTaskHeap(t *testing.T) {
-	h := &taskHeap{}
-	heap.Init(h)
-
-	// Test Push
-	heap.Push(h, 3)
-	heap.Push(h, 1)
-	heap.Push(h, 2)
-	heap.Push(h, 1) // Duplicate, should not be added
-
-	assert.Equal(t, 3, h.Len(), "Heap should contain 3 items")
-
-	// Test Pop
-	assert.Equal(t, 1, heap.Pop(h), "First pop should return the smallest element")
-	assert.Equal(t, 2, heap.Pop(h), "Second pop should return the next smallest element")
-	assert.Equal(t, 3, heap.Pop(h), "Third pop should return the largest element")
-
-	assert.Equal(t, 0, h.Len(), "Heap should be empty after all elements are popped")
-}

From 3d6d486a33b0f74bfb1c841930cb9d3a7cb25994 Mon Sep 17 00:00:00 2001
From: Steven Landers <steven.landers@gmail.com>
Date: Thu, 30 Nov 2023 18:52:20 -0500
Subject: [PATCH 42/65] fixes

---
 tasks/scheduler.go       | 44 ++++++++++++++++++++++++----------------
 tasks/scheduler_test.go  | 12 +++++------
 tasks/task_queue.go      | 11 +---------
 tasks/task_queue_test.go |  5 ++++-
 4 files changed, 37 insertions(+), 35 deletions(-)

diff --git a/tasks/scheduler.go b/tasks/scheduler.go
index 75b9ab252..1d42fc279 100644
--- a/tasks/scheduler.go
+++ b/tasks/scheduler.go
@@ -19,7 +19,7 @@ type scheduler struct {
 	workers            int
 	multiVersionStores map[sdk.StoreKey]multiversion.MultiVersionStore
 	tracingInfo        *tracing.Info
-	allTasks           []*TxTask
+	tasks              []*TxTask
 	executeCh          chan func()
 	validateCh         chan func()
 	timer              *Timer
@@ -41,27 +41,34 @@ func (s *scheduler) WithTimer(name string, work func()) {
 	s.timer.End(name, id)
 }
 
+func (s *scheduler) initScheduler(ctx sdk.Context, reqs []*sdk.DeliverTxEntry) (Queue, int) {
+	// initialize mutli-version stores
+	s.initMultiVersionStore(ctx)
+	// prefill estimates
+	s.PrefillEstimates(reqs)
+	tasks := toTasks(ctx, reqs)
+	s.tasks = tasks
+
+	workers := s.workers
+	if s.workers < 1 {
+		workers = len(tasks)
+	}
+
+	// initialize scheduler queue
+	queue := NewTaskQueue(tasks)
+
+	// send all tasks to queue
+	go queue.ExecuteAll()
+
+	return queue, workers
+}
+
 func (s *scheduler) ProcessAll(ctx sdk.Context, reqs []*sdk.DeliverTxEntry) ([]types.ResponseDeliverTx, error) {
 	var results []types.ResponseDeliverTx
 	var err error
 	s.WithTimer("ProcessAll", func() {
-		// initialize mutli-version stores
-		s.initMultiVersionStore(ctx)
-		// prefill estimates
-		s.PrefillEstimates(reqs)
-		tasks := toTasks(ctx, reqs)
-		s.allTasks = tasks
-
-		workers := s.workers
-		if s.workers < 1 {
-			workers = len(tasks)
-		}
-
-		// initialize scheduler queue
-		queue := NewTaskQueue(tasks)
 
-		// send all tasks to queue
-		go queue.ExecuteAll()
+		queue, workers := s.initScheduler(ctx, reqs)
 
 		wg := sync.WaitGroup{}
 		wg.Add(workers)
@@ -82,6 +89,7 @@ func (s *scheduler) ProcessAll(ctx sdk.Context, reqs []*sdk.DeliverTxEntry) ([]t
 
 					// removing this lock creates a lot more tasks
 					task.LockTask()
+					// this safely gets the task type while someone could be editing it
 					if tt, ok := task.PopTaskType(); ok {
 						s.processTask(ctx, tt, worker, task, queue)
 					}
@@ -97,7 +105,7 @@ func (s *scheduler) ProcessAll(ctx sdk.Context, reqs []*sdk.DeliverTxEntry) ([]t
 		for _, mv := range s.multiVersionStores {
 			mv.WriteLatestToStore()
 		}
-		results = collectResponses(tasks)
+		results = collectResponses(s.tasks)
 		err = nil
 	})
 	s.timer.PrintReport()
diff --git a/tasks/scheduler_test.go b/tasks/scheduler_test.go
index 291ec4164..2bf8c9865 100644
--- a/tasks/scheduler_test.go
+++ b/tasks/scheduler_test.go
@@ -127,10 +127,10 @@ func TestProcessAll(t *testing.T) {
 	}{
 		{
 			name:      "Test no overlap txs",
-			workers:   500,
-			runs:      1,
+			workers:   5,
+			runs:      100,
 			addStores: true,
-			requests:  requestList(10000),
+			requests:  requestList(100),
 			deliverTxFunc: func(ctx sdk.Context, req types.RequestDeliverTx) types.ResponseDeliverTx {
 				// all txs read and write to the same key to maximize conflicts
 				kv := ctx.MultiStore().GetKVStore(testStoreKey)
@@ -158,10 +158,10 @@ func TestProcessAll(t *testing.T) {
 		},
 		{
 			name:      "Test every tx accesses same key",
-			workers:   500,
-			runs:      1,
+			workers:   5,
+			runs:      100,
 			addStores: true,
-			requests:  requestList(10000),
+			requests:  requestList(100),
 			deliverTxFunc: func(ctx sdk.Context, req types.RequestDeliverTx) types.ResponseDeliverTx {
 				// all txs read and write to the same key to maximize conflicts
 				kv := ctx.MultiStore().GetKVStore(testStoreKey)
diff --git a/tasks/task_queue.go b/tasks/task_queue.go
index d9a86b825..4d9eb1ed0 100644
--- a/tasks/task_queue.go
+++ b/tasks/task_queue.go
@@ -40,9 +40,6 @@ type Queue interface {
 type taskQueue struct {
 	lockTimerID   string
 	mx            sync.Mutex
-	condMx        sync.Mutex
-	heapMx        sync.Mutex
-	cond          *sync.Cond
 	once          sync.Once
 	executing     sync.Map
 	finished      sync.Map
@@ -55,10 +52,8 @@ type taskQueue struct {
 func NewTaskQueue(tasks []*TxTask) Queue {
 	sq := &taskQueue{
 		tasks: tasks,
-		out:   make(chan int, len(tasks)*10),
+		out:   make(chan int, len(tasks)*2),
 	}
-	sq.cond = sync.NewCond(&sq.condMx)
-
 	return sq
 }
 
@@ -90,10 +85,6 @@ func (sq *taskQueue) getTask(idx int) *TxTask {
 
 func (sq *taskQueue) validate(idx int) {
 	task := sq.getTask(idx)
-	if sq.isExecuting(idx) {
-		TaskLog(task, "(skip validating, executing...)")
-		return
-	}
 	if sq.getTask(idx).SetTaskType(TypeValidation) {
 		TaskLog(task, "-> validate")
 		sq.pushTask(idx, TypeValidation)
diff --git a/tasks/task_queue_test.go b/tasks/task_queue_test.go
index 11302def8..be0a80826 100644
--- a/tasks/task_queue_test.go
+++ b/tasks/task_queue_test.go
@@ -43,7 +43,9 @@ func TestSchedulerQueue(t *testing.T) {
 
 	// Test Close
 	queue.Close()
-	_, ok = queue.NextTask()
+	for ok {
+		nextTask, ok = queue.NextTask()
+	}
 	assert.False(t, ok)
 
 	// Test FinishExecute leads to Validation
@@ -51,6 +53,7 @@ func TestSchedulerQueue(t *testing.T) {
 	queue.ExecuteAll()
 	nextTask, ok = queue.NextTask()
 	assert.True(t, ok)
+	nextTask.PopTaskType()
 	queue.FinishExecute(nextTask.Index)
 	assertValidating(t, nextTask)
 

From b008e136f541f92da48ca46461e7b54472094f34 Mon Sep 17 00:00:00 2001
From: Steven Landers <steven.landers@gmail.com>
Date: Fri, 1 Dec 2023 14:00:14 -0500
Subject: [PATCH 43/65] fix closed channel

---
 tasks/scheduler.go      |  43 +++++++++++----
 tasks/scheduler_test.go |  12 ++---
 tasks/task_queue.go     | 114 +++++++++++++++++++++++++++++-----------
 tasks/timer.go          |  24 +--------
 tasks/utils.go          |  18 +++++++
 5 files changed, 142 insertions(+), 69 deletions(-)

diff --git a/tasks/scheduler.go b/tasks/scheduler.go
index 1d42fc279..a31cfb60d 100644
--- a/tasks/scheduler.go
+++ b/tasks/scheduler.go
@@ -7,6 +7,7 @@ import (
 	"github.com/cosmos/cosmos-sdk/utils/tracing"
 	"github.com/tendermint/tendermint/abci/types"
 	"sync"
+	"sync/atomic"
 )
 
 // Scheduler processes tasks concurrently
@@ -66,24 +67,43 @@ func (s *scheduler) initScheduler(ctx sdk.Context, reqs []*sdk.DeliverTxEntry) (
 func (s *scheduler) ProcessAll(ctx sdk.Context, reqs []*sdk.DeliverTxEntry) ([]types.ResponseDeliverTx, error) {
 	var results []types.ResponseDeliverTx
 	var err error
+	counter := atomic.Int32{}
+
 	s.WithTimer("ProcessAll", func() {
 
 		queue, workers := s.initScheduler(ctx, reqs)
-
 		wg := sync.WaitGroup{}
 		wg.Add(workers)
+		mx := sync.Mutex{}
+		activeSet := newSyncSet()
+		final := atomic.Bool{}
 
 		for i := 0; i < workers; i++ {
 			go func(worker int) {
 				defer wg.Done()
 
 				for {
-					if queue.IsCompleted() {
-						queue.Close()
+					if activeSet.Length() == 0 {
+						mx.Lock()
+						if queue.IsCompleted() {
+							if final.Load() {
+								queue.Close()
+							} else {
+								final.Store(true)
+								queue.ValidateAll()
+							}
+						}
+						mx.Unlock()
 					}
 
+					cancel := hangDebug(func() {
+						fmt.Printf("worker=%d, completed=%v\n", worker, queue.IsCompleted())
+					})
 					task, anyTasks := queue.NextTask()
+					cancel()
+					activeSet.Add(worker)
 					if !anyTasks {
+						activeSet.Delete(worker)
 						return
 					}
 
@@ -91,10 +111,13 @@ func (s *scheduler) ProcessAll(ctx sdk.Context, reqs []*sdk.DeliverTxEntry) ([]t
 					task.LockTask()
 					// this safely gets the task type while someone could be editing it
 					if tt, ok := task.PopTaskType(); ok {
-						s.processTask(ctx, tt, worker, task, queue)
+						counter.Add(1)
+						if !s.processTask(ctx, tt, worker, task, queue) {
+							final.Store(false)
+						}
 					}
 					task.UnlockTask()
-
+					activeSet.Delete(worker)
 				}
 
 			}(i)
@@ -108,13 +131,13 @@ func (s *scheduler) ProcessAll(ctx sdk.Context, reqs []*sdk.DeliverTxEntry) ([]t
 		results = collectResponses(s.tasks)
 		err = nil
 	})
-	s.timer.PrintReport()
+	//s.timer.PrintReport()
+	//fmt.Printf("Total Tasks: %d\n", counter.Load())
 
 	return results, err
 }
 
 func (s *scheduler) processTask(ctx sdk.Context, taskType TaskType, w int, t *TxTask, queue Queue) bool {
-	var result bool
 	switch taskType {
 	case TypeValidation:
 		TaskLog(t, fmt.Sprintf("TypeValidation (worker=%d)", w))
@@ -128,7 +151,7 @@ func (s *scheduler) processTask(ctx sdk.Context, taskType TaskType, w int, t *Tx
 			TaskLog(t, "*** VALIDATED ***")
 			// informs queue that it's complete (any subsequent submission for idx unsets this)
 			queue.FinishTask(t.Index)
-			result = true
+			return true
 		case statusWaiting:
 			// task should be re-validated (waiting on others)
 			// how can we wait on dependencies?
@@ -155,13 +178,13 @@ func (s *scheduler) processTask(ctx sdk.Context, taskType TaskType, w int, t *Tx
 		} else {
 			TaskLog(t, fmt.Sprintf("FINISHING task EXECUTION (worker=%d, incarnation=%d)", w, t.Incarnation))
 			queue.FinishExecute(t.Index)
-			//TODO: speed this up
 			queue.ValidateLaterTasks(t.Index)
+			//TODO: speed this up
 		}
 
 	default:
 		TaskLog(t, "unexpected type")
 		panic("unexpected type")
 	}
-	return result
+	return false
 }
diff --git a/tasks/scheduler_test.go b/tasks/scheduler_test.go
index 2bf8c9865..6d49d5745 100644
--- a/tasks/scheduler_test.go
+++ b/tasks/scheduler_test.go
@@ -127,10 +127,10 @@ func TestProcessAll(t *testing.T) {
 	}{
 		{
 			name:      "Test no overlap txs",
-			workers:   5,
-			runs:      100,
+			workers:   500,
+			runs:      1,
 			addStores: true,
-			requests:  requestList(100),
+			requests:  requestList(10000),
 			deliverTxFunc: func(ctx sdk.Context, req types.RequestDeliverTx) types.ResponseDeliverTx {
 				// all txs read and write to the same key to maximize conflicts
 				kv := ctx.MultiStore().GetKVStore(testStoreKey)
@@ -158,10 +158,10 @@ func TestProcessAll(t *testing.T) {
 		},
 		{
 			name:      "Test every tx accesses same key",
-			workers:   5,
-			runs:      100,
+			workers:   500,
+			runs:      10,
 			addStores: true,
-			requests:  requestList(100),
+			requests:  requestList(10000),
 			deliverTxFunc: func(ctx sdk.Context, req types.RequestDeliverTx) types.ResponseDeliverTx {
 				// all txs read and write to the same key to maximize conflicts
 				kv := ctx.MultiStore().GetKVStore(testStoreKey)
diff --git a/tasks/task_queue.go b/tasks/task_queue.go
index 4d9eb1ed0..8570194ab 100644
--- a/tasks/task_queue.go
+++ b/tasks/task_queue.go
@@ -29,6 +29,8 @@ type Queue interface {
 	FinishExecute(idx int)
 	// FinishTask marks a task as finished (only upon valid).
 	FinishTask(idx int)
+	// ValidateAll marks all tasks as pending validation.
+	ValidateAll()
 	// ValidateLaterTasks marks all tasks after the given index as pending validation.
 	ValidateLaterTasks(afterIdx int)
 	// IsCompleted returns true if all tasks have been executed and validated.
@@ -38,12 +40,14 @@ type Queue interface {
 }
 
 type taskQueue struct {
-	lockTimerID   string
-	mx            sync.Mutex
-	once          sync.Once
-	executing     sync.Map
-	finished      sync.Map
-	finishedCount atomic.Int32
+	lockTimerID string
+	qmx         sync.RWMutex
+	mx          sync.Mutex
+	once        sync.Once
+	executing   sync.Map
+	finished    *syncSet
+	queueLen    atomic.Int64
+	closed      bool
 
 	out   chan int
 	tasks []*TxTask
@@ -51,8 +55,9 @@ type taskQueue struct {
 
 func NewTaskQueue(tasks []*TxTask) Queue {
 	sq := &taskQueue{
-		tasks: tasks,
-		out:   make(chan int, len(tasks)*2),
+		tasks:    tasks,
+		out:      make(chan int, len(tasks)*10),
+		finished: newSyncSet(),
 	}
 	return sq
 }
@@ -68,12 +73,7 @@ func (sq *taskQueue) unlock() {
 func (sq *taskQueue) execute(idx int) {
 	if sq.getTask(idx).SetTaskType(TypeExecution) {
 		TaskLog(sq.getTask(idx), "-> execute")
-
-		if sq.isFinished(idx) {
-			sq.finished.Delete(idx)
-			sq.finishedCount.Add(-1)
-		}
-
+		sq.finished.Delete(idx)
 		sq.executing.Store(idx, struct{}{})
 		sq.pushTask(idx, TypeExecution)
 	}
@@ -98,8 +98,7 @@ func (sq *taskQueue) isExecuting(idx int) bool {
 
 // FinishExecute marks a task as finished executing and transitions directly validation
 func (sq *taskQueue) FinishExecute(idx int) {
-
-	TaskLog(sq.getTask(idx), "-> finish task execute")
+	defer TaskLog(sq.getTask(idx), "-> finished task execute")
 
 	if !sq.isExecuting(idx) {
 		TaskLog(sq.getTask(idx), "not executing, but trying to finish execute")
@@ -113,23 +112,16 @@ func (sq *taskQueue) FinishExecute(idx int) {
 // FinishTask marks a task as finished if nothing else queued it
 // this drives whether the queue thinks everything is done processing
 func (sq *taskQueue) FinishTask(idx int) {
-	if sq.isFinished(idx) {
-		return
-	}
-
+	sq.finished.Add(idx)
 	TaskLog(sq.getTask(idx), "FinishTask -> task is FINISHED (for now)")
-	sq.finishedCount.Add(1)
-	sq.finished.Store(idx, struct{}{})
 }
 
 // ReValidate re-validates a task (back to queue from validation)
 func (sq *taskQueue) ReValidate(idx int) {
-
 	if sq.isExecuting(idx) {
 		TaskLog(sq.getTask(idx), "task is executing (unexpected)")
 		panic("cannot re-validate an executing task")
 	}
-
 	sq.validate(idx)
 }
 
@@ -140,18 +132,22 @@ func (sq *taskQueue) Execute(idx int) {
 	sq.execute(idx)
 }
 
+func (sq *taskQueue) ValidateAll() {
+	for idx := 0; idx < len(sq.tasks); idx++ {
+		sq.validate(idx)
+	}
+}
+
 // ValidateLaterTasks marks all tasks after the given index as pending validation.
 // any executing tasks are skipped
 func (sq *taskQueue) ValidateLaterTasks(afterIdx int) {
-
 	for idx := afterIdx + 1; idx < len(sq.tasks); idx++ {
 		sq.validate(idx)
 	}
 }
 
 func (sq *taskQueue) isFinished(idx int) bool {
-	_, ok := sq.finished.Load(idx)
-	return ok && sq.getTask(idx).IsStatus(statusValidated)
+	return sq.finished.Exists(idx) && sq.getTask(idx).IsStatus(statusValidated)
 }
 
 func (sq *taskQueue) DependenciesFinished(idx int) bool {
@@ -165,21 +161,34 @@ func (sq *taskQueue) DependenciesFinished(idx int) bool {
 
 // IsCompleted returns true if all tasks are "finished"
 func (sq *taskQueue) IsCompleted() bool {
-	fc := sq.finishedCount.Load()
-	return fc == int32(len(sq.tasks))
+	queued := sq.queueLen.Load()
+	if queued > 0 {
+		return false
+	}
+	finished := sq.finished.Length()
+	tasks := len(sq.tasks)
+	if finished != tasks {
+		return false
+	}
+	return true
 }
 
 func (sq *taskQueue) pushTask(idx int, taskType TaskType) {
 	TaskLog(sq.getTask(idx), fmt.Sprintf("-> PUSH task (%s/%d)", taskType, sq.getTask(idx).Incarnation))
+	sq.queueLen.Add(1)
+	sq.qmx.RLock()
+	defer sq.qmx.RUnlock()
+	if sq.closed {
+		TaskLog(sq.getTask(idx), "queue is closed")
+		return
+	}
 	sq.out <- idx
 }
 
 // ExecuteAll executes all tasks in the queue (called to start processing)
 func (sq *taskQueue) ExecuteAll() {
 	for idx := range sq.tasks {
-		sq.lock()
 		sq.execute(idx)
-		sq.unlock()
 	}
 }
 
@@ -191,6 +200,7 @@ func (sq *taskQueue) NextTask() (*TxTask, bool) {
 	if !open {
 		return nil, false
 	}
+	defer sq.queueLen.Add(-1)
 	res := sq.getTask(idx)
 	TaskLog(res, fmt.Sprintf("<- POP task (%d)", res.Incarnation))
 	return res, true
@@ -199,6 +209,48 @@ func (sq *taskQueue) NextTask() (*TxTask, bool) {
 // Close closes the queue, causing NextTask to return false.
 func (sq *taskQueue) Close() {
 	sq.once.Do(func() {
+		sq.qmx.Lock()
+		defer sq.qmx.Unlock()
+		sq.closed = true
 		close(sq.out)
 	})
 }
+
+// syncSet is like sync.Map but it supports length
+type syncSet struct {
+	mx sync.Mutex
+	m  map[int]struct{}
+}
+
+func newSyncSet() *syncSet {
+	return &syncSet{
+		m: make(map[int]struct{}),
+	}
+}
+
+func (ss *syncSet) Add(idx int) {
+	ss.mx.Lock()
+	defer ss.mx.Unlock()
+	ss.m[idx] = struct{}{}
+}
+
+func (ss *syncSet) Delete(idx int) {
+	ss.mx.Lock()
+	defer ss.mx.Unlock()
+	if _, ok := ss.m[idx]; ok {
+		delete(ss.m, idx)
+	}
+}
+
+func (ss *syncSet) Length() int {
+	ss.mx.Lock()
+	defer ss.mx.Unlock()
+	return len(ss.m)
+}
+
+func (ss *syncSet) Exists(idx int) bool {
+	ss.mx.Lock()
+	defer ss.mx.Unlock()
+	_, ok := ss.m[idx]
+	return ok
+}
diff --git a/tasks/timer.go b/tasks/timer.go
index bac767a81..bb8680d48 100644
--- a/tasks/timer.go
+++ b/tasks/timer.go
@@ -1,8 +1,9 @@
 package tasks
 
 import (
-	"fmt"
 	"github.com/google/uuid"
+
+	"fmt"
 	"sort"
 	"strings"
 	"sync"
@@ -36,30 +37,9 @@ func NewTimer(name string) *Timer {
 		reports: make(map[string]*TimerReport),
 		ch:      make(chan timeEvt, 10000),
 	}
-	go t.consume()
 	return t
 }
 
-func (t *Timer) consume() {
-	for evt := range t.ch {
-		if evt.start {
-			if _, ok := t.reports[evt.name]; !ok {
-				t.reports[evt.name] = &TimerReport{
-					starts: make(map[string]time.Time),
-					times:  nil,
-				}
-			}
-			t.reports[evt.name].starts[evt.id] = evt.timestamp
-		} else {
-			if rpt, ok := t.reports[evt.name]; ok {
-				if start, ok := rpt.starts[evt.id]; ok {
-					rpt.times = append(rpt.times, evt.timestamp.Sub(start))
-				}
-			}
-		}
-	}
-}
-
 func (t *Timer) PrintReport() {
 	t.mx.Lock()
 	defer t.mx.Unlock()
diff --git a/tasks/utils.go b/tasks/utils.go
index 23ea6773a..38da24200 100644
--- a/tasks/utils.go
+++ b/tasks/utils.go
@@ -1,6 +1,7 @@
 package tasks
 
 import (
+	"context"
 	"crypto/sha256"
 	"fmt"
 	"github.com/cosmos/cosmos-sdk/store/multiversion"
@@ -8,6 +9,7 @@ import (
 	"github.com/tendermint/tendermint/abci/types"
 	"go.opentelemetry.io/otel/attribute"
 	"go.opentelemetry.io/otel/trace"
+	"time"
 )
 
 // TODO: remove after things work
@@ -27,6 +29,22 @@ func (s *scheduler) traceSpan(ctx sdk.Context, name string, task *TxTask) (sdk.C
 	return ctx, span
 }
 
+func hangDebug(msg func()) context.CancelFunc {
+	ctx, cancel := context.WithCancel(context.Background())
+	ticker := time.NewTicker(1 * time.Second)
+	go func() {
+		for {
+			select {
+			case <-ticker.C:
+				msg()
+			case <-ctx.Done():
+				return
+			}
+		}
+	}()
+	return cancel
+}
+
 func toTasks(ctx sdk.Context, reqs []*sdk.DeliverTxEntry) []*TxTask {
 	res := make([]*TxTask, 0, len(reqs))
 	for idx, r := range reqs {

From 04dd6a1c64412e4d1daa84298a12cce5b7b46728 Mon Sep 17 00:00:00 2001
From: Steven Landers <steven.landers@gmail.com>
Date: Sat, 2 Dec 2023 13:58:29 -0500
Subject: [PATCH 44/65] adjust timer

---
 tasks/scheduler.go      | 8 +-------
 tasks/scheduler_test.go | 4 ++--
 tasks/timer.go          | 6 ++++++
 3 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/tasks/scheduler.go b/tasks/scheduler.go
index a31cfb60d..7c77d0f4d 100644
--- a/tasks/scheduler.go
+++ b/tasks/scheduler.go
@@ -36,12 +36,6 @@ func NewScheduler(workers int, tracingInfo *tracing.Info, deliverTxFunc func(ctx
 	}
 }
 
-func (s *scheduler) WithTimer(name string, work func()) {
-	id := s.timer.Start(name)
-	work()
-	s.timer.End(name, id)
-}
-
 func (s *scheduler) initScheduler(ctx sdk.Context, reqs []*sdk.DeliverTxEntry) (Queue, int) {
 	// initialize mutli-version stores
 	s.initMultiVersionStore(ctx)
@@ -69,7 +63,7 @@ func (s *scheduler) ProcessAll(ctx sdk.Context, reqs []*sdk.DeliverTxEntry) ([]t
 	var err error
 	counter := atomic.Int32{}
 
-	s.WithTimer("ProcessAll", func() {
+	WithTimer(s.timer, "ProcessAll", func() {
 
 		queue, workers := s.initScheduler(ctx, reqs)
 		wg := sync.WaitGroup{}
diff --git a/tasks/scheduler_test.go b/tasks/scheduler_test.go
index 6d49d5745..c886e3a4f 100644
--- a/tasks/scheduler_test.go
+++ b/tasks/scheduler_test.go
@@ -158,10 +158,10 @@ func TestProcessAll(t *testing.T) {
 		},
 		{
 			name:      "Test every tx accesses same key",
-			workers:   500,
+			workers:   50,
 			runs:      10,
 			addStores: true,
-			requests:  requestList(10000),
+			requests:  requestList(1000),
 			deliverTxFunc: func(ctx sdk.Context, req types.RequestDeliverTx) types.ResponseDeliverTx {
 				// all txs read and write to the same key to maximize conflicts
 				kv := ctx.MultiStore().GetKVStore(testStoreKey)
diff --git a/tasks/timer.go b/tasks/timer.go
index bb8680d48..4c881e36f 100644
--- a/tasks/timer.go
+++ b/tasks/timer.go
@@ -89,6 +89,12 @@ func (t *Timer) PrintReport() {
 	fmt.Println(strings.Join(lines, "\n"))
 }
 
+func WithTimer(t *Timer, name string, work func()) {
+	id := t.Start(name)
+	work()
+	t.End(name, id)
+}
+
 func (t *Timer) Start(name string) string {
 	id := uuid.New().String()
 	go func() {

From 05940d566e4dae662d6289e677a31272786713c9 Mon Sep 17 00:00:00 2001
From: Steven Landers <steven.landers@gmail.com>
Date: Sun, 3 Dec 2023 16:18:01 -0500
Subject: [PATCH 45/65] save off perf improvements

---
 tasks/int_set.go                | 139 ++++++++++++++++++++++++++++++++
 tasks/int_set_benchmark_test.go |  99 +++++++++++++++++++++++
 tasks/scheduler.go              |  13 ++-
 tasks/scheduler_test.go         |  17 +++-
 tasks/task.go                   |  85 ++++++++++++++++---
 tasks/task_queue.go             |  64 ++-------------
 tasks/utils.go                  |  30 ++++---
 7 files changed, 355 insertions(+), 92 deletions(-)
 create mode 100644 tasks/int_set.go
 create mode 100644 tasks/int_set_benchmark_test.go

diff --git a/tasks/int_set.go b/tasks/int_set.go
new file mode 100644
index 000000000..5a157b680
--- /dev/null
+++ b/tasks/int_set.go
@@ -0,0 +1,139 @@
+package tasks
+
+import (
+	"sync"
+	"sync/atomic"
+)
+
+type IntSet interface {
+	Add(idx int)
+	Delete(idx int)
+	Length() int
+	Exists(idx int) bool
+}
+
+// points to implementation
+func newIntSet(size int) IntSet {
+	return newIntSetSyncMap(size)
+}
+
+// syncSetMap uses a map with a RW Mutex
+type intSetMap struct {
+	mx sync.RWMutex
+	m  map[int]struct{}
+}
+
+func newIntSetMap(size int) IntSet {
+	return &intSetMap{
+		m: make(map[int]struct{}),
+	}
+}
+
+func (ss *intSetMap) Add(idx int) {
+	ss.mx.Lock()
+	defer ss.mx.Unlock()
+	ss.m[idx] = struct{}{}
+}
+
+func (ss *intSetMap) Delete(idx int) {
+	if ss.Exists(idx) {
+		ss.mx.Lock()
+		defer ss.mx.Unlock()
+		delete(ss.m, idx)
+	}
+}
+
+func (ss *intSetMap) Length() int {
+	ss.mx.RLock()
+	defer ss.mx.RUnlock()
+	return len(ss.m)
+}
+
+func (ss *intSetMap) Exists(idx int) bool {
+	ss.mx.RLock()
+	defer ss.mx.RUnlock()
+	_, ok := ss.m[idx]
+	return ok
+}
+
+// intSetSyncMap uses a sync.Map with a length counter
+type intSetSyncMap struct {
+	m      sync.Map
+	length int32
+}
+
+func newIntSetSyncMap(size int) IntSet {
+	return &intSetSyncMap{}
+}
+
+func (ss *intSetSyncMap) Add(idx int) {
+	_, loaded := ss.m.LoadOrStore(idx, struct{}{})
+	if !loaded {
+		atomic.AddInt32(&ss.length, 1)
+	}
+}
+
+func (ss *intSetSyncMap) Delete(idx int) {
+	_, ok := ss.m.Load(idx)
+	if ok {
+		ss.m.Delete(idx)
+		atomic.AddInt32(&ss.length, -1)
+	}
+}
+
+func (ss *intSetSyncMap) Length() int {
+	return int(atomic.LoadInt32(&ss.length))
+}
+
+func (ss *intSetSyncMap) Exists(idx int) bool {
+	_, ok := ss.m.Load(idx)
+	return ok
+}
+
+// syncSet holds a set of integers in a thread-safe way.
+type intSetByteSlice struct {
+	locks  []sync.RWMutex
+	state  []byte
+	length int32
+}
+
+func newIntSetByteSlice(size int) *syncSet {
+	return &syncSet{
+		state: make([]byte, size),
+		locks: make([]sync.RWMutex, size),
+	}
+}
+
+func (ss *intSetByteSlice) Add(idx int) {
+	// First check without locking to reduce contention.
+	if ss.state[idx] == byte(0) {
+		ss.locks[idx].Lock()
+		// Check again to make sure it hasn't changed since acquiring the lock.
+		if ss.state[idx] == byte(0) {
+			ss.state[idx] = byte(1)
+			atomic.AddInt32(&ss.length, 1)
+		}
+		ss.locks[idx].Unlock()
+	}
+}
+
+func (ss *intSetByteSlice) Delete(idx int) {
+	ss.locks[idx].Lock()
+	defer ss.locks[idx].Unlock()
+
+	// Check again to make sure it hasn't changed since acquiring the lock.
+	if ss.state[idx] == byte(1) {
+		ss.state[idx] = byte(0)
+		atomic.AddInt32(&ss.length, -1)
+	}
+
+}
+
+func (ss *intSetByteSlice) Length() int {
+	return int(atomic.LoadInt32(&ss.length))
+}
+
+func (ss *intSetByteSlice) Exists(idx int) bool {
+	// Atomic read of a single byte is safe
+	return ss.state[idx] == byte(1)
+}
diff --git a/tasks/int_set_benchmark_test.go b/tasks/int_set_benchmark_test.go
new file mode 100644
index 000000000..18e05f39c
--- /dev/null
+++ b/tasks/int_set_benchmark_test.go
@@ -0,0 +1,99 @@
+package tasks
+
+import (
+	"math/rand"
+	"testing"
+)
+
+func intSetImpl(size int) IntSet {
+	return newSyncSet(size)
+}
+
+func BenchmarkSyncSet_Add(b *testing.B) {
+	size := 1000 // Assuming a size of 1000 for this example
+	ss := intSetImpl(size)
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		ss.Add(i % size) // Loop over the syncSet size to avoid out-of-range panics
+	}
+}
+
+func BenchmarkSyncSet_Delete(b *testing.B) {
+	size := 1000
+	ss := intSetImpl(size)
+	// Pre-fill the syncSet to delete from
+	for i := 0; i < size; i++ {
+		ss.Add(i)
+	}
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		ss.Delete(i % size) // Loop over the syncSet size to avoid out-of-range panics
+	}
+}
+
+func BenchmarkSyncSet_Length(b *testing.B) {
+	size := 1000
+	ss := intSetImpl(size)
+	// Pre-fill the syncSet
+	for i := 0; i < size; i++ {
+		ss.Add(i)
+	}
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		_ = ss.Length()
+	}
+}
+
+func BenchmarkSyncSet_Exists(b *testing.B) {
+	size := 1000
+	ss := intSetImpl(size)
+	// Pre-fill the syncSet
+	for i := 0; i < size; i++ {
+		ss.Add(i)
+	}
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		_ = ss.Exists(i % size) // Loop over the syncSet size to avoid out-of-range panics
+	}
+}
+
+func BenchmarkSyncSet_Add_Contention(b *testing.B) {
+	size := 1000 // The size of the syncSet
+	ss := intSetImpl(size)
+	b.ResetTimer()
+	b.RunParallel(func(pb *testing.PB) {
+		for pb.Next() {
+			ss.Add(rand.Intn(size)) // Use a random index for contention
+		}
+	})
+}
+
+func BenchmarkSyncSet_Delete_Contention(b *testing.B) {
+	size := 1000 // The size of the syncSet
+	ss := intSetImpl(size)
+	// Pre-fill the syncSet to delete from
+	for i := 0; i < size; i++ {
+		ss.Add(i)
+	}
+	b.ResetTimer()
+	b.RunParallel(func(pb *testing.PB) {
+		for pb.Next() {
+			ss.Delete(rand.Intn(size)) // Use a random index for contention
+		}
+	})
+}
+
+func BenchmarkSyncSet_Exists_Contention(b *testing.B) {
+	size := 1000 // The size of the syncSet
+	ss := intSetImpl(size)
+	// Pre-fill the syncSet
+	for i := 0; i < size; i++ {
+		ss.Add(i)
+	}
+	b.ResetTimer()
+	b.RunParallel(func(pb *testing.PB) {
+		for pb.Next() {
+			ss.Exists(rand.Intn(size)) // Use a random index for contention
+		}
+	})
+}
diff --git a/tasks/scheduler.go b/tasks/scheduler.go
index 7c77d0f4d..461256f1d 100644
--- a/tasks/scheduler.go
+++ b/tasks/scheduler.go
@@ -69,7 +69,7 @@ func (s *scheduler) ProcessAll(ctx sdk.Context, reqs []*sdk.DeliverTxEntry) ([]t
 		wg := sync.WaitGroup{}
 		wg.Add(workers)
 		mx := sync.Mutex{}
-		activeSet := newSyncSet()
+		var activeCount int32
 		final := atomic.Bool{}
 
 		for i := 0; i < workers; i++ {
@@ -77,7 +77,7 @@ func (s *scheduler) ProcessAll(ctx sdk.Context, reqs []*sdk.DeliverTxEntry) ([]t
 				defer wg.Done()
 
 				for {
-					if activeSet.Length() == 0 {
+					if atomic.LoadInt32(&activeCount) == 0 {
 						mx.Lock()
 						if queue.IsCompleted() {
 							if final.Load() {
@@ -95,9 +95,9 @@ func (s *scheduler) ProcessAll(ctx sdk.Context, reqs []*sdk.DeliverTxEntry) ([]t
 					})
 					task, anyTasks := queue.NextTask()
 					cancel()
-					activeSet.Add(worker)
+					atomic.AddInt32(&activeCount, 1)
+
 					if !anyTasks {
-						activeSet.Delete(worker)
 						return
 					}
 
@@ -111,7 +111,7 @@ func (s *scheduler) ProcessAll(ctx sdk.Context, reqs []*sdk.DeliverTxEntry) ([]t
 						}
 					}
 					task.UnlockTask()
-					activeSet.Delete(worker)
+					atomic.AddInt32(&activeCount, -1)
 				}
 
 			}(i)
@@ -125,7 +125,7 @@ func (s *scheduler) ProcessAll(ctx sdk.Context, reqs []*sdk.DeliverTxEntry) ([]t
 		results = collectResponses(s.tasks)
 		err = nil
 	})
-	//s.timer.PrintReport()
+	s.timer.PrintReport()
 	//fmt.Printf("Total Tasks: %d\n", counter.Load())
 
 	return results, err
@@ -173,7 +173,6 @@ func (s *scheduler) processTask(ctx sdk.Context, taskType TaskType, w int, t *Tx
 			TaskLog(t, fmt.Sprintf("FINISHING task EXECUTION (worker=%d, incarnation=%d)", w, t.Incarnation))
 			queue.FinishExecute(t.Index)
 			queue.ValidateLaterTasks(t.Index)
-			//TODO: speed this up
 		}
 
 	default:
diff --git a/tasks/scheduler_test.go b/tasks/scheduler_test.go
index c886e3a4f..82b045d28 100644
--- a/tasks/scheduler_test.go
+++ b/tasks/scheduler_test.go
@@ -4,6 +4,9 @@ import (
 	"context"
 	"errors"
 	"fmt"
+	"net/http"
+	_ "net/http/pprof"
+	"runtime"
 	"testing"
 
 	"github.com/stretchr/testify/require"
@@ -115,6 +118,12 @@ func TestExplicitOrdering(t *testing.T) {
 }
 
 func TestProcessAll(t *testing.T) {
+	runtime.SetBlockProfileRate(1)
+
+	go func() {
+		http.ListenAndServe("localhost:6060", nil)
+	}()
+
 	tests := []struct {
 		name          string
 		workers       int
@@ -128,7 +137,7 @@ func TestProcessAll(t *testing.T) {
 		{
 			name:      "Test no overlap txs",
 			workers:   500,
-			runs:      1,
+			runs:      10,
 			addStores: true,
 			requests:  requestList(10000),
 			deliverTxFunc: func(ctx sdk.Context, req types.RequestDeliverTx) types.ResponseDeliverTx {
@@ -158,10 +167,10 @@ func TestProcessAll(t *testing.T) {
 		},
 		{
 			name:      "Test every tx accesses same key",
-			workers:   50,
-			runs:      10,
+			workers:   500,
+			runs:      1,
 			addStores: true,
-			requests:  requestList(1000),
+			requests:  requestList(10000),
 			deliverTxFunc: func(ctx sdk.Context, req types.RequestDeliverTx) types.ResponseDeliverTx {
 				// all txs read and write to the same key to maximize conflicts
 				kv := ctx.MultiStore().GetKVStore(testStoreKey)
diff --git a/tasks/task.go b/tasks/task.go
index 4602a6dea..387c7ca6e 100644
--- a/tasks/task.go
+++ b/tasks/task.go
@@ -6,6 +6,7 @@ import (
 	"github.com/cosmos/cosmos-sdk/types/occ"
 	"github.com/tendermint/tendermint/abci/types"
 	"sync"
+	"sync/atomic"
 )
 
 type status string
@@ -40,6 +41,8 @@ type TxTask struct {
 	Dependencies  []int
 	Abort         *occ.Abort
 	Index         int
+	Executing     byte
+	Validating    byte
 	Incarnation   int
 	Request       types.RequestDeliverTx
 	Response      *types.ResponseDeliverTx
@@ -50,6 +53,10 @@ func (dt *TxTask) LockTask() {
 	dt.mx.Lock()
 }
 
+func (dt *TxTask) TryLockTask() bool {
+	return dt.mx.TryLock()
+}
+
 func (dt *TxTask) UnlockTask() {
 	dt.mx.Unlock()
 }
@@ -61,21 +68,25 @@ func (dt *TxTask) IsStatus(s status) bool {
 }
 
 func (dt *TxTask) SetTaskType(tt TaskType) bool {
+	// Early check to potentially avoid locking.
+	if tt == TypeValidation && dt.taskType == TypeNone {
+		return dt.updateTaskType(tt)
+	} else if tt == TypeExecution && dt.taskType != TypeExecution {
+		return dt.updateTaskType(tt)
+	}
+	return false
+}
+
+// updateTaskType assumes that an update is likely needed and does the final check within the lock.
+func (dt *TxTask) updateTaskType(tt TaskType) bool {
 	dt.rwMx.Lock()
 	defer dt.rwMx.Unlock()
-	switch tt {
-	case TypeValidation:
-		if dt.taskType == TypeNone {
-			TaskLog(dt, "SCHEDULE task VALIDATION")
-			dt.taskType = tt
-			return true
-		}
-	case TypeExecution:
-		if dt.taskType != TypeExecution {
-			TaskLog(dt, "SCHEDULE task EXECUTION")
-			dt.taskType = tt
-			return true
-		}
+	if tt == TypeValidation && dt.taskType == TypeNone {
+		dt.taskType = tt
+		return true
+	} else if tt == TypeExecution && dt.taskType != TypeExecution {
+		dt.taskType = tt
+		return true
 	}
 	return false
 }
@@ -143,3 +154,51 @@ func (dt *TxTask) ResetForExecution() {
 func (dt *TxTask) Increment() {
 	dt.Incarnation++
 }
+
+// syncSet uses byte slices instead of a map (fastest benchmark)
+type syncSet struct {
+	locks  []sync.RWMutex
+	state  []byte
+	length int32
+}
+
+func newSyncSet(size int) *syncSet {
+	return &syncSet{
+		state: make([]byte, size),
+		locks: make([]sync.RWMutex, size),
+	}
+}
+
+func (ss *syncSet) Add(idx int) {
+	// First check without locking to reduce contention.
+	if ss.state[idx] == byte(0) {
+		ss.locks[idx].Lock()
+		// Check again to make sure it hasn't changed since acquiring the lock.
+		if ss.state[idx] == byte(0) {
+			ss.state[idx] = byte(1)
+			atomic.AddInt32(&ss.length, 1)
+		}
+		ss.locks[idx].Unlock()
+	}
+}
+
+func (ss *syncSet) Delete(idx int) {
+	ss.locks[idx].Lock()
+	defer ss.locks[idx].Unlock()
+
+	// Check again to make sure it hasn't changed since acquiring the lock.
+	if ss.state[idx] == byte(1) {
+		ss.state[idx] = byte(0)
+		atomic.AddInt32(&ss.length, -1)
+	}
+
+}
+
+func (ss *syncSet) Length() int {
+	return int(atomic.LoadInt32(&ss.length))
+}
+
+func (ss *syncSet) Exists(idx int) bool {
+	// Atomic read of a single byte is safe
+	return ss.state[idx] == byte(1)
+}
diff --git a/tasks/task_queue.go b/tasks/task_queue.go
index 8570194ab..bf1e5ec6c 100644
--- a/tasks/task_queue.go
+++ b/tasks/task_queue.go
@@ -42,10 +42,9 @@ type Queue interface {
 type taskQueue struct {
 	lockTimerID string
 	qmx         sync.RWMutex
-	mx          sync.Mutex
 	once        sync.Once
-	executing   sync.Map
-	finished    *syncSet
+	executing   IntSet
+	finished    IntSet
 	queueLen    atomic.Int64
 	closed      bool
 
@@ -55,26 +54,19 @@ type taskQueue struct {
 
 func NewTaskQueue(tasks []*TxTask) Queue {
 	sq := &taskQueue{
-		tasks:    tasks,
-		out:      make(chan int, len(tasks)*10),
-		finished: newSyncSet(),
+		tasks:     tasks,
+		out:       make(chan int, len(tasks)*10),
+		finished:  newIntSet(len(tasks)), // newSyncSetMap(), //(len(tasks)),
+		executing: newIntSet(len(tasks)),
 	}
 	return sq
 }
 
-func (sq *taskQueue) lock() {
-	sq.mx.Lock()
-}
-
-func (sq *taskQueue) unlock() {
-	sq.mx.Unlock()
-}
-
 func (sq *taskQueue) execute(idx int) {
 	if sq.getTask(idx).SetTaskType(TypeExecution) {
 		TaskLog(sq.getTask(idx), "-> execute")
 		sq.finished.Delete(idx)
-		sq.executing.Store(idx, struct{}{})
+		sq.executing.Add(idx)
 		sq.pushTask(idx, TypeExecution)
 	}
 }
@@ -92,8 +84,7 @@ func (sq *taskQueue) validate(idx int) {
 }
 
 func (sq *taskQueue) isExecuting(idx int) bool {
-	_, ok := sq.executing.Load(idx)
-	return ok
+	return sq.executing.Exists(idx)
 }
 
 // FinishExecute marks a task as finished executing and transitions directly validation
@@ -215,42 +206,3 @@ func (sq *taskQueue) Close() {
 		close(sq.out)
 	})
 }
-
-// syncSet is like sync.Map but it supports length
-type syncSet struct {
-	mx sync.Mutex
-	m  map[int]struct{}
-}
-
-func newSyncSet() *syncSet {
-	return &syncSet{
-		m: make(map[int]struct{}),
-	}
-}
-
-func (ss *syncSet) Add(idx int) {
-	ss.mx.Lock()
-	defer ss.mx.Unlock()
-	ss.m[idx] = struct{}{}
-}
-
-func (ss *syncSet) Delete(idx int) {
-	ss.mx.Lock()
-	defer ss.mx.Unlock()
-	if _, ok := ss.m[idx]; ok {
-		delete(ss.m, idx)
-	}
-}
-
-func (ss *syncSet) Length() int {
-	ss.mx.Lock()
-	defer ss.mx.Unlock()
-	return len(ss.m)
-}
-
-func (ss *syncSet) Exists(idx int) bool {
-	ss.mx.Lock()
-	defer ss.mx.Unlock()
-	_, ok := ss.m[idx]
-	return ok
-}
diff --git a/tasks/utils.go b/tasks/utils.go
index 38da24200..b6daef985 100644
--- a/tasks/utils.go
+++ b/tasks/utils.go
@@ -2,12 +2,9 @@ package tasks
 
 import (
 	"context"
-	"crypto/sha256"
-	"fmt"
 	"github.com/cosmos/cosmos-sdk/store/multiversion"
 	sdk "github.com/cosmos/cosmos-sdk/types"
 	"github.com/tendermint/tendermint/abci/types"
-	"go.opentelemetry.io/otel/attribute"
 	"go.opentelemetry.io/otel/trace"
 	"time"
 )
@@ -18,15 +15,24 @@ func TaskLog(task *TxTask, msg string) {
 	//fmt.Println(fmt.Sprintf("%d: Task(%d/%s/%d):\t%s", time.Now().UnixMicro(), task.Index, task.status, task.Incarnation, msg))
 }
 
-func (s *scheduler) traceSpan(ctx sdk.Context, name string, task *TxTask) (sdk.Context, trace.Span) {
-	spanCtx, span := s.tracingInfo.StartWithContext(name, ctx.TraceSpanContext())
-	if task != nil {
-		span.SetAttributes(attribute.String("txHash", fmt.Sprintf("%X", sha256.Sum256(task.Request.Tx))))
-		span.SetAttributes(attribute.Int("txIndex", task.Index))
-		span.SetAttributes(attribute.Int("txIncarnation", task.Incarnation))
-	}
-	ctx = ctx.WithTraceSpanContext(spanCtx)
-	return ctx, span
+type Endable interface {
+	End(options ...trace.SpanEndOption)
+}
+
+type mockEndable struct{}
+
+func (m *mockEndable) End(options ...trace.SpanEndOption) {}
+
+func (s *scheduler) traceSpan(ctx sdk.Context, name string, task *TxTask) (sdk.Context, Endable) {
+	//spanCtx, span := s.tracingInfo.StartWithContext(name, ctx.TraceSpanContext())
+	//if task != nil {
+	//	span.SetAttributes(attribute.String("txHash", fmt.Sprintf("%X", sha256.Sum256(task.Request.Tx))))
+	//	span.SetAttributes(attribute.Int("txIndex", task.Index))
+	//	span.SetAttributes(attribute.Int("txIncarnation", task.Incarnation))
+	//}
+	//ctx = ctx.WithTraceSpanContext(spanCtx)
+	//return ctx, span
+	return ctx, &mockEndable{}
 }
 
 func hangDebug(msg func()) context.CancelFunc {

From 6351efb2480ef31cf9525bef80b8d1eff81e4575 Mon Sep 17 00:00:00 2001
From: Steven Landers <steven.landers@gmail.com>
Date: Sun, 3 Dec 2023 18:38:07 -0500
Subject: [PATCH 46/65] add shards

---
 tasks/scheduler.go       | 17 ++++++++--------
 tasks/scheduler_test.go  | 10 +++++-----
 tasks/task_queue.go      | 43 +++++++++++++++++++++++-----------------
 tasks/task_queue_test.go | 14 ++++++-------
 4 files changed, 46 insertions(+), 38 deletions(-)

diff --git a/tasks/scheduler.go b/tasks/scheduler.go
index 461256f1d..2340098a6 100644
--- a/tasks/scheduler.go
+++ b/tasks/scheduler.go
@@ -50,7 +50,7 @@ func (s *scheduler) initScheduler(ctx sdk.Context, reqs []*sdk.DeliverTxEntry) (
 	}
 
 	// initialize scheduler queue
-	queue := NewTaskQueue(tasks)
+	queue := NewTaskQueue(tasks, workers)
 
 	// send all tasks to queue
 	go queue.ExecuteAll()
@@ -93,7 +93,7 @@ func (s *scheduler) ProcessAll(ctx sdk.Context, reqs []*sdk.DeliverTxEntry) ([]t
 					cancel := hangDebug(func() {
 						fmt.Printf("worker=%d, completed=%v\n", worker, queue.IsCompleted())
 					})
-					task, anyTasks := queue.NextTask()
+					task, anyTasks := queue.NextTask(worker)
 					cancel()
 					atomic.AddInt32(&activeCount, 1)
 
@@ -101,8 +101,6 @@ func (s *scheduler) ProcessAll(ctx sdk.Context, reqs []*sdk.DeliverTxEntry) ([]t
 						return
 					}
 
-					// removing this lock creates a lot more tasks
-					task.LockTask()
 					// this safely gets the task type while someone could be editing it
 					if tt, ok := task.PopTaskType(); ok {
 						counter.Add(1)
@@ -110,7 +108,6 @@ func (s *scheduler) ProcessAll(ctx sdk.Context, reqs []*sdk.DeliverTxEntry) ([]t
 							final.Store(false)
 						}
 					}
-					task.UnlockTask()
 					atomic.AddInt32(&activeCount, -1)
 				}
 
@@ -141,9 +138,9 @@ func (s *scheduler) processTask(ctx sdk.Context, taskType TaskType, w int, t *Tx
 		// check the outcome of validation and do things accordingly
 		switch t.status {
 		case statusValidated:
-			// task is possibly finished (can be re-validated by others)
+			// task is finished (but can be re-validated by others)
 			TaskLog(t, "*** VALIDATED ***")
-			// informs queue that it's complete (any subsequent submission for idx unsets this)
+			// informs queue that it's complete (counts towards overall completion)
 			queue.FinishTask(t.Index)
 			return true
 		case statusWaiting:
@@ -152,9 +149,12 @@ func (s *scheduler) processTask(ctx sdk.Context, taskType TaskType, w int, t *Tx
 			TaskLog(t, "waiting/executed...revalidating")
 			if queue.DependenciesFinished(t.Index) {
 				queue.Execute(t.Index)
+			} else {
+				queue.ReValidate(t.Index)
 			}
 		case statusInvalid:
 			TaskLog(t, "invalid (re-executing, re-validating > tx)")
+			queue.ValidateLaterTasks(t.Index)
 			queue.Execute(t.Index)
 		default:
 			TaskLog(t, "unexpected status")
@@ -168,11 +168,12 @@ func (s *scheduler) processTask(ctx sdk.Context, taskType TaskType, w int, t *Tx
 		s.executeTask(t)
 
 		if t.IsStatus(statusAborted) {
+			//TODO ideally this would wait until dependencies are finished
 			queue.Execute(t.Index)
 		} else {
 			TaskLog(t, fmt.Sprintf("FINISHING task EXECUTION (worker=%d, incarnation=%d)", w, t.Incarnation))
 			queue.FinishExecute(t.Index)
-			queue.ValidateLaterTasks(t.Index)
+			//queue.ValidateLaterTasks(t.Index)
 		}
 
 	default:
diff --git a/tasks/scheduler_test.go b/tasks/scheduler_test.go
index 82b045d28..26e2e1b67 100644
--- a/tasks/scheduler_test.go
+++ b/tasks/scheduler_test.go
@@ -136,10 +136,10 @@ func TestProcessAll(t *testing.T) {
 	}{
 		{
 			name:      "Test no overlap txs",
-			workers:   500,
+			workers:   50,
 			runs:      10,
 			addStores: true,
-			requests:  requestList(10000),
+			requests:  requestList(1000),
 			deliverTxFunc: func(ctx sdk.Context, req types.RequestDeliverTx) types.ResponseDeliverTx {
 				// all txs read and write to the same key to maximize conflicts
 				kv := ctx.MultiStore().GetKVStore(testStoreKey)
@@ -167,10 +167,10 @@ func TestProcessAll(t *testing.T) {
 		},
 		{
 			name:      "Test every tx accesses same key",
-			workers:   500,
-			runs:      1,
+			workers:   50,
+			runs:      100,
 			addStores: true,
-			requests:  requestList(10000),
+			requests:  requestList(1000),
 			deliverTxFunc: func(ctx sdk.Context, req types.RequestDeliverTx) types.ResponseDeliverTx {
 				// all txs read and write to the same key to maximize conflicts
 				kv := ctx.MultiStore().GetKVStore(testStoreKey)
diff --git a/tasks/task_queue.go b/tasks/task_queue.go
index bf1e5ec6c..0b479f0a3 100644
--- a/tasks/task_queue.go
+++ b/tasks/task_queue.go
@@ -16,7 +16,7 @@ const (
 
 type Queue interface {
 	// NextTask returns the next task to be executed, or nil if the queue is closed.
-	NextTask() (*TxTask, bool)
+	NextTask(workerID int) (*TxTask, bool)
 	// Close closes the queue, causing NextTask to return false.
 	Close()
 	// ExecuteAll executes all tasks in the queue.
@@ -47,15 +47,20 @@ type taskQueue struct {
 	finished    IntSet
 	queueLen    atomic.Int64
 	closed      bool
-
-	out   chan int
-	tasks []*TxTask
+	workers     int
+	shards      []chan int
+	tasks       []*TxTask
 }
 
-func NewTaskQueue(tasks []*TxTask) Queue {
+func NewTaskQueue(tasks []*TxTask, workers int) Queue {
+	shards := make([]chan int, 0, workers)
+	for i := 0; i < workers; i++ {
+		shards = append(shards, make(chan int, len(tasks)*2))
+	}
 	sq := &taskQueue{
+		workers:   workers,
 		tasks:     tasks,
-		out:       make(chan int, len(tasks)*10),
+		shards:    shards,
 		finished:  newIntSet(len(tasks)), // newSyncSetMap(), //(len(tasks)),
 		executing: newIntSet(len(tasks)),
 	}
@@ -91,10 +96,10 @@ func (sq *taskQueue) isExecuting(idx int) bool {
 func (sq *taskQueue) FinishExecute(idx int) {
 	defer TaskLog(sq.getTask(idx), "-> finished task execute")
 
-	if !sq.isExecuting(idx) {
-		TaskLog(sq.getTask(idx), "not executing, but trying to finish execute")
-		panic("not executing, but trying to finish execute")
-	}
+	//if !sq.isExecuting(idx) {
+	//	TaskLog(sq.getTask(idx), "not executing, but trying to finish execute")
+	//	panic("not executing, but trying to finish execute")
+	//}
 
 	sq.executing.Delete(idx)
 	sq.validate(idx)
@@ -109,10 +114,10 @@ func (sq *taskQueue) FinishTask(idx int) {
 
 // ReValidate re-validates a task (back to queue from validation)
 func (sq *taskQueue) ReValidate(idx int) {
-	if sq.isExecuting(idx) {
-		TaskLog(sq.getTask(idx), "task is executing (unexpected)")
-		panic("cannot re-validate an executing task")
-	}
+	//if sq.isExecuting(idx) {
+	//	TaskLog(sq.getTask(idx), "task is executing (unexpected)")
+	//	panic("cannot re-validate an executing task")
+	//}
 	sq.validate(idx)
 }
 
@@ -173,7 +178,7 @@ func (sq *taskQueue) pushTask(idx int, taskType TaskType) {
 		TaskLog(sq.getTask(idx), "queue is closed")
 		return
 	}
-	sq.out <- idx
+	sq.shards[idx%sq.workers] <- idx
 }
 
 // ExecuteAll executes all tasks in the queue (called to start processing)
@@ -186,8 +191,8 @@ func (sq *taskQueue) ExecuteAll() {
 // NextTask returns the next task to be executed, or nil if the queue is closed.
 // this hangs if no tasks are ready because it's possible a new task might arrive
 // closing the queue causes NextTask to return false immediately
-func (sq *taskQueue) NextTask() (*TxTask, bool) {
-	idx, open := <-sq.out
+func (sq *taskQueue) NextTask(workerID int) (*TxTask, bool) {
+	idx, open := <-sq.shards[workerID]
 	if !open {
 		return nil, false
 	}
@@ -203,6 +208,8 @@ func (sq *taskQueue) Close() {
 		sq.qmx.Lock()
 		defer sq.qmx.Unlock()
 		sq.closed = true
-		close(sq.out)
+		for _, shard := range sq.shards {
+			close(shard)
+		}
 	})
 }
diff --git a/tasks/task_queue_test.go b/tasks/task_queue_test.go
index be0a80826..c66a674b8 100644
--- a/tasks/task_queue_test.go
+++ b/tasks/task_queue_test.go
@@ -24,7 +24,7 @@ func assertValidating(t *testing.T, task *TxTask) {
 
 func testQueue() (Queue, []*TxTask) {
 	tasks := generateTasks(10)
-	return NewTaskQueue(tasks), tasks
+	return NewTaskQueue(tasks, 1), tasks
 }
 
 func TestSchedulerQueue(t *testing.T) {
@@ -37,21 +37,21 @@ func TestSchedulerQueue(t *testing.T) {
 	}
 
 	// Test NextTask
-	nextTask, ok := queue.NextTask()
+	nextTask, ok := queue.NextTask(0)
 	assert.True(t, ok)
 	assert.Equal(t, tasks[0], nextTask)
 
 	// Test Close
 	queue.Close()
 	for ok {
-		nextTask, ok = queue.NextTask()
+		nextTask, ok = queue.NextTask(0)
 	}
 	assert.False(t, ok)
 
 	// Test FinishExecute leads to Validation
 	queue, tasks = testQueue()
 	queue.ExecuteAll()
-	nextTask, ok = queue.NextTask()
+	nextTask, ok = queue.NextTask(0)
 	assert.True(t, ok)
 	nextTask.PopTaskType()
 	queue.FinishExecute(nextTask.Index)
@@ -61,7 +61,7 @@ func TestSchedulerQueue(t *testing.T) {
 	queue, tasks = testQueue()
 	queue.ExecuteAll()
 	queue.ValidateLaterTasks(-1)
-	nextTask, ok = queue.NextTask()
+	nextTask, ok = queue.NextTask(0)
 	assert.True(t, ok)
 	assertExecuting(t, nextTask) // still executing
 
@@ -69,7 +69,7 @@ func TestSchedulerQueue(t *testing.T) {
 	queue, tasks = testQueue()
 	queue.ExecuteAll()
 	queue.ValidateLaterTasks(-1)
-	nextTask, ok = queue.NextTask()
+	nextTask, ok = queue.NextTask(0)
 	assert.True(t, ok)
 	assertExecuting(t, nextTask)
 
@@ -79,7 +79,7 @@ func TestSchedulerQueue(t *testing.T) {
 
 	for idx, task := range tasks {
 		task.SetStatus(statusValidated)
-		queue.NextTask()
+		queue.NextTask(0)
 		queue.FinishTask(idx)
 		if idx == len(tasks)-1 {
 			queue.Close()

From d4d8c1e309a19e8f5ab97c65d78f5661a5522e01 Mon Sep 17 00:00:00 2001
From: Uday Patil <udpatil@gmail.com>
Date: Mon, 4 Dec 2023 09:23:04 -0600
Subject: [PATCH 47/65] Improve invalidate writeset (#369)

## Describe your changes and provide context

## Testing performed to validate your change
---
 store/multiversion/mvkv_test.go  |  8 ------
 store/multiversion/store.go      | 49 ++++++++++++++------------------
 store/multiversion/store_test.go |  6 ----
 3 files changed, 22 insertions(+), 41 deletions(-)

diff --git a/store/multiversion/mvkv_test.go b/store/multiversion/mvkv_test.go
index 44304fd50..ff4ab9da9 100644
--- a/store/multiversion/mvkv_test.go
+++ b/store/multiversion/mvkv_test.go
@@ -148,10 +148,6 @@ func TestVersionIndexedStoreWrite(t *testing.T) {
 		"key3": []byte("value3"),
 	})
 
-	require.False(t, mvs.Has(3, []byte("key1")))
-	require.False(t, mvs.Has(3, []byte("key2")))
-	require.True(t, mvs.Has(3, []byte("key3")))
-
 	// write some keys
 	vis.Set([]byte("key1"), []byte("value1"))
 	vis.Set([]byte("key2"), []byte("value2"))
@@ -175,10 +171,6 @@ func TestVersionIndexedStoreWriteEstimates(t *testing.T) {
 		"key3": []byte("value3"),
 	})
 
-	require.False(t, mvs.Has(3, []byte("key1")))
-	require.False(t, mvs.Has(3, []byte("key2")))
-	require.True(t, mvs.Has(3, []byte("key3")))
-
 	// write some keys
 	vis.Set([]byte("key1"), []byte("value1"))
 	vis.Set([]byte("key2"), []byte("value2"))
diff --git a/store/multiversion/store.go b/store/multiversion/store.go
index 16b0e626b..031b12936 100644
--- a/store/multiversion/store.go
+++ b/store/multiversion/store.go
@@ -14,7 +14,6 @@ import (
 type MultiVersionStore interface {
 	GetLatest(key []byte) (value MultiVersionValueItem)
 	GetLatestBeforeIndex(index int, key []byte) (value MultiVersionValueItem)
-	Has(index int, key []byte) bool
 	WriteLatestToStore()
 	SetWriteset(index int, incarnation int, writeset WriteSet)
 	InvalidateWriteset(index int, incarnation int)
@@ -46,6 +45,8 @@ type Store struct {
 	txReadSets     *sync.Map // map of tx index -> readset ReadSet
 	txIterateSets  *sync.Map // map of tx index -> iterateset Iterateset
 
+	txEstimateFlags *sync.Map
+
 	parentStore types.KVStore
 }
 
@@ -55,6 +56,7 @@ func NewMultiVersionStore(parentStore types.KVStore) *Store {
 		txWritesetKeys:  &sync.Map{},
 		txReadSets:      &sync.Map{},
 		txIterateSets:   &sync.Map{},
+		txEstimateFlags: &sync.Map{},
 		parentStore:     parentStore,
 	}
 }
@@ -76,6 +78,13 @@ func (s *Store) GetLatest(key []byte) (value MultiVersionValueItem) {
 	if !found {
 		return nil // this is possible IF there is are writeset that are then removed for that key
 	}
+	txIndex := latestVal.Index()
+	// check against estimate map
+	_, estimateFound := s.txEstimateFlags.Load(txIndex)
+	if estimateFound {
+		// it shouldnt be an issue to have a new item instead of modifying existing?
+		return NewEstimateItem(txIndex, latestVal.Incarnation())
+	}
 	return latestVal
 }
 
@@ -92,23 +101,17 @@ func (s *Store) GetLatestBeforeIndex(index int, key []byte) (value MultiVersionV
 	if !found {
 		return nil
 	}
+	txIndex := val.Index()
+	// check against estimate map
+	_, estimateFound := s.txEstimateFlags.Load(txIndex)
+	if estimateFound {
+		// it shouldnt be an issue to have a new item instead of modifying existing?
+		return NewEstimateItem(txIndex, val.Incarnation())
+	}
 	// found a value prior to the passed in index, return that value (could be estimate OR deleted, but it is a definitive value)
 	return val
 }
 
-// Has implements MultiVersionStore. It checks if the key exists in the multiversion store at or before the specified index.
-func (s *Store) Has(index int, key []byte) bool {
-
-	keyString := string(key)
-	mvVal, found := s.multiVersionMap.Load(keyString)
-	// if the key doesn't exist in the overall map, return nil
-	if !found {
-		return false // this is okay because the caller of this will THEN need to access the parent store to verify that the key doesnt exist there
-	}
-	_, foundVal := mvVal.(MultiVersionValue).GetLatestBeforeIndex(index)
-	return foundVal
-}
-
 func (s *Store) removeOldWriteset(index int, newWriteSet WriteSet) {
 	writeset := make(map[string][]byte)
 	if newWriteSet != nil {
@@ -157,23 +160,14 @@ func (s *Store) SetWriteset(index int, incarnation int, writeset WriteSet) {
 			mvVal.Set(index, incarnation, value)
 		}
 	}
-	sort.Strings(writeSetKeys) // TODO: if we're sorting here anyways, maybe we just put it into a btree instead of a slice
+	sort.Strings(writeSetKeys)      // TODO: if we're sorting here anyways, maybe we just put it into a btree instead of a slice
+	s.txEstimateFlags.Delete(index) // remove estimate flag if it exists
 	s.txWritesetKeys.Store(index, writeSetKeys)
 }
 
-// InvalidateWriteset iterates over the keys for the given index and incarnation writeset and replaces with ESTIMATEs
+// InvalidateWriteset updates the estimateFlags to indicate the writeset is out of date
 func (s *Store) InvalidateWriteset(index int, incarnation int) {
-	keysAny, found := s.txWritesetKeys.Load(index)
-	if !found {
-		return
-	}
-	keys := keysAny.([]string)
-	for _, key := range keys {
-		// invalidate all of the writeset items - is this suboptimal? - we could potentially do concurrently if slow because locking is on an item specific level
-		val, _ := s.multiVersionMap.LoadOrStore(key, NewMultiVersionItem())
-		val.(MultiVersionValue).SetEstimate(index, incarnation)
-	}
-	// we leave the writeset in place because we'll need it for key removal later if/when we replace with a new writeset
+	s.txEstimateFlags.Store(index, struct{}{}) // set estimate flag
 }
 
 // SetEstimatedWriteset is used to directly write estimates instead of writing a writeset and later invalidating
@@ -190,6 +184,7 @@ func (s *Store) SetEstimatedWriteset(index int, incarnation int, writeset WriteS
 		mvVal.(MultiVersionValue).SetEstimate(index, incarnation)
 	}
 	sort.Strings(writeSetKeys)
+	s.txEstimateFlags.Store(index, struct{}{}) // set estimate flag
 	s.txWritesetKeys.Store(index, writeSetKeys)
 }
 
diff --git a/store/multiversion/store_test.go b/store/multiversion/store_test.go
index ae0f3afda..b2f367f4c 100644
--- a/store/multiversion/store_test.go
+++ b/store/multiversion/store_test.go
@@ -46,11 +46,6 @@ func TestMultiVersionStore(t *testing.T) {
 	})
 	require.True(t, store.GetLatestBeforeIndex(5, []byte("key1")).IsEstimate())
 	require.Equal(t, []byte("value4"), store.GetLatestBeforeIndex(7, []byte("key1")).Value())
-
-	// Test Has
-	require.True(t, store.Has(2, []byte("key1")))
-	require.False(t, store.Has(0, []byte("key1")))
-	require.False(t, store.Has(5, []byte("key4")))
 }
 
 func TestMultiVersionStoreHasLaterValue(t *testing.T) {
@@ -69,7 +64,6 @@ func TestMultiVersionStoreKeyDNE(t *testing.T) {
 
 	require.Nil(t, store.GetLatest([]byte("key1")))
 	require.Nil(t, store.GetLatestBeforeIndex(0, []byte("key1")))
-	require.False(t, store.Has(0, []byte("key1")))
 }
 
 func TestMultiVersionStoreWriteToParent(t *testing.T) {

From 466e81d7b07572f56706cee07e79ff95ba668862 Mon Sep 17 00:00:00 2001
From: Uday Patil <udpatil@gmail.com>
Date: Mon, 4 Dec 2023 09:29:04 -0600
Subject: [PATCH 48/65] Improve invalidate writeset (#370)

## Describe your changes and provide context

## Testing performed to validate your change
---
 store/multiversion/store.go | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/store/multiversion/store.go b/store/multiversion/store.go
index 031b12936..3be2ab4f3 100644
--- a/store/multiversion/store.go
+++ b/store/multiversion/store.go
@@ -384,6 +384,11 @@ func (s *Store) ValidateTransactionState(index int) (bool, []int) {
 }
 
 func (s *Store) WriteLatestToStore() {
+	// we expect the estimateFlags to be empty
+	s.txEstimateFlags.Range(func(key, value interface{}) bool {
+		panic("estimate flags should be empty when writing to parent store")
+		return false
+	})
 	// sort the keys
 	keys := []string{}
 	s.multiVersionMap.Range(func(key, value interface{}) bool {

From 4265efbfc9924aec34be21a7b5a5a36aa6ede4d1 Mon Sep 17 00:00:00 2001
From: Uday Patil <udpatil@gmail.com>
Date: Mon, 4 Dec 2023 10:11:13 -0600
Subject: [PATCH 49/65] replace loadAndDelete with load

---
 store/multiversion/store.go | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/store/multiversion/store.go b/store/multiversion/store.go
index 3be2ab4f3..47f52348c 100644
--- a/store/multiversion/store.go
+++ b/store/multiversion/store.go
@@ -119,7 +119,7 @@ func (s *Store) removeOldWriteset(index int, newWriteSet WriteSet) {
 		writeset = newWriteSet
 	}
 	// if there is already a writeset existing, we should remove that fully
-	oldKeys, loaded := s.txWritesetKeys.LoadAndDelete(index)
+	oldKeys, loaded := s.txWritesetKeys.Load(index)
 	if loaded {
 		keys := oldKeys.([]string)
 		// we need to delete all of the keys in the writeset from the multiversion store
@@ -387,7 +387,6 @@ func (s *Store) WriteLatestToStore() {
 	// we expect the estimateFlags to be empty
 	s.txEstimateFlags.Range(func(key, value interface{}) bool {
 		panic("estimate flags should be empty when writing to parent store")
-		return false
 	})
 	// sort the keys
 	keys := []string{}

From 6c51897af394e98cd7835d32fca0a627fdca4daf Mon Sep 17 00:00:00 2001
From: Uday Patil <udpatil@gmail.com>
Date: Mon, 4 Dec 2023 13:12:58 -0600
Subject: [PATCH 50/65] use slice pointer instead of slice

---
 store/multiversion/store.go | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/store/multiversion/store.go b/store/multiversion/store.go
index 47f52348c..806fd971f 100644
--- a/store/multiversion/store.go
+++ b/store/multiversion/store.go
@@ -121,9 +121,9 @@ func (s *Store) removeOldWriteset(index int, newWriteSet WriteSet) {
 	// if there is already a writeset existing, we should remove that fully
 	oldKeys, loaded := s.txWritesetKeys.Load(index)
 	if loaded {
-		keys := oldKeys.([]string)
+		keys := oldKeys.(*[]string)
 		// we need to delete all of the keys in the writeset from the multiversion store
-		for _, key := range keys {
+		for _, key := range *keys {
 			// small optimization to check if the new writeset is going to write this key, if so, we can leave it behind
 			if _, ok := writeset[key]; ok {
 				// we don't need to remove this key because it will be overwritten anyways - saves the operation of removing + rebalancing underlying btree
@@ -162,7 +162,7 @@ func (s *Store) SetWriteset(index int, incarnation int, writeset WriteSet) {
 	}
 	sort.Strings(writeSetKeys)      // TODO: if we're sorting here anyways, maybe we just put it into a btree instead of a slice
 	s.txEstimateFlags.Delete(index) // remove estimate flag if it exists
-	s.txWritesetKeys.Store(index, writeSetKeys)
+	s.txWritesetKeys.Store(index, &writeSetKeys)
 }
 
 // InvalidateWriteset updates the estimateFlags to indicate the writeset is out of date
@@ -185,7 +185,7 @@ func (s *Store) SetEstimatedWriteset(index int, incarnation int, writeset WriteS
 	}
 	sort.Strings(writeSetKeys)
 	s.txEstimateFlags.Store(index, struct{}{}) // set estimate flag
-	s.txWritesetKeys.Store(index, writeSetKeys)
+	s.txWritesetKeys.Store(index, &writeSetKeys)
 }
 
 // GetAllWritesetKeys implements MultiVersionStore.
@@ -194,8 +194,8 @@ func (s *Store) GetAllWritesetKeys() map[int][]string {
 	// TODO: is this safe?
 	s.txWritesetKeys.Range(func(key, value interface{}) bool {
 		index := key.(int)
-		keys := value.([]string)
-		writesetKeys[index] = keys
+		keys := value.(*[]string)
+		writesetKeys[index] = *keys
 		return true
 	})
 

From 998f98f853f6532b7208b49803b57c60e5b10a01 Mon Sep 17 00:00:00 2001
From: Uday Patil <udpatil@gmail.com>
Date: Mon, 4 Dec 2023 13:14:18 -0600
Subject: [PATCH 51/65] fix

---
 store/multiversion/store.go | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/store/multiversion/store.go b/store/multiversion/store.go
index 806fd971f..04ebf54ad 100644
--- a/store/multiversion/store.go
+++ b/store/multiversion/store.go
@@ -244,9 +244,9 @@ func (s *Store) CollectIteratorItems(index int) *db.MemDB {
 		if !found {
 			continue
 		}
-		indexedWriteset := writesetAny.([]string)
+		indexedWriteset := writesetAny.(*[]string)
 		// TODO: do we want to exclude keys out of the range or just let the iterator handle it?
-		for _, key := range indexedWriteset {
+		for _, key := range *indexedWriteset {
 			// TODO: inefficient because (logn) for each key + rebalancing? maybe theres a better way to add to a tree to reduce rebalancing overhead
 			sortedItems.Set([]byte(key), []byte{})
 		}

From f843e1090266585e50e056d44d47270a2f67e338 Mon Sep 17 00:00:00 2001
From: Steven Landers <steven.landers@gmail.com>
Date: Mon, 4 Dec 2023 14:33:57 -0500
Subject: [PATCH 52/65] update intset

---
 tasks/int_set.go                |  2 +-
 tasks/int_set_benchmark_test.go |  2 +-
 tasks/scheduler.go              | 16 ++++++----------
 tasks/scheduler_test.go         |  6 +++---
 4 files changed, 11 insertions(+), 15 deletions(-)

diff --git a/tasks/int_set.go b/tasks/int_set.go
index 5a157b680..29d772066 100644
--- a/tasks/int_set.go
+++ b/tasks/int_set.go
@@ -14,7 +14,7 @@ type IntSet interface {
 
 // points to implementation
 func newIntSet(size int) IntSet {
-	return newIntSetSyncMap(size)
+	return newIntSetByteSlice(size)
 }
 
 // syncSetMap uses a map with a RW Mutex
diff --git a/tasks/int_set_benchmark_test.go b/tasks/int_set_benchmark_test.go
index 18e05f39c..bf21e16a1 100644
--- a/tasks/int_set_benchmark_test.go
+++ b/tasks/int_set_benchmark_test.go
@@ -6,7 +6,7 @@ import (
 )
 
 func intSetImpl(size int) IntSet {
-	return newSyncSet(size)
+	return newIntSetByteSlice(size)
 }
 
 func BenchmarkSyncSet_Add(b *testing.B) {
diff --git a/tasks/scheduler.go b/tasks/scheduler.go
index 2340098a6..78303ab32 100644
--- a/tasks/scheduler.go
+++ b/tasks/scheduler.go
@@ -78,23 +78,22 @@ func (s *scheduler) ProcessAll(ctx sdk.Context, reqs []*sdk.DeliverTxEntry) ([]t
 
 				for {
 					if atomic.LoadInt32(&activeCount) == 0 {
-						mx.Lock()
-						if queue.IsCompleted() {
+						if queue.IsCompleted() && mx.TryLock() {
 							if final.Load() {
 								queue.Close()
 							} else {
 								final.Store(true)
 								queue.ValidateAll()
 							}
+							mx.Unlock()
 						}
-						mx.Unlock()
 					}
 
-					cancel := hangDebug(func() {
-						fmt.Printf("worker=%d, completed=%v\n", worker, queue.IsCompleted())
-					})
+					//cancel := hangDebug(func() {
+					//	fmt.Printf("worker=%d, completed=%v\n", worker, queue.IsCompleted())
+					//})
 					task, anyTasks := queue.NextTask(worker)
-					cancel()
+					//cancel()
 					atomic.AddInt32(&activeCount, 1)
 
 					if !anyTasks {
@@ -149,8 +148,6 @@ func (s *scheduler) processTask(ctx sdk.Context, taskType TaskType, w int, t *Tx
 			TaskLog(t, "waiting/executed...revalidating")
 			if queue.DependenciesFinished(t.Index) {
 				queue.Execute(t.Index)
-			} else {
-				queue.ReValidate(t.Index)
 			}
 		case statusInvalid:
 			TaskLog(t, "invalid (re-executing, re-validating > tx)")
@@ -173,7 +170,6 @@ func (s *scheduler) processTask(ctx sdk.Context, taskType TaskType, w int, t *Tx
 		} else {
 			TaskLog(t, fmt.Sprintf("FINISHING task EXECUTION (worker=%d, incarnation=%d)", w, t.Incarnation))
 			queue.FinishExecute(t.Index)
-			//queue.ValidateLaterTasks(t.Index)
 		}
 
 	default:
diff --git a/tasks/scheduler_test.go b/tasks/scheduler_test.go
index 26e2e1b67..f93c1b8ab 100644
--- a/tasks/scheduler_test.go
+++ b/tasks/scheduler_test.go
@@ -136,10 +136,10 @@ func TestProcessAll(t *testing.T) {
 	}{
 		{
 			name:      "Test no overlap txs",
-			workers:   50,
-			runs:      10,
+			workers:   20,
+			runs:      100,
 			addStores: true,
-			requests:  requestList(1000),
+			requests:  requestList(10000),
 			deliverTxFunc: func(ctx sdk.Context, req types.RequestDeliverTx) types.ResponseDeliverTx {
 				// all txs read and write to the same key to maximize conflicts
 				kv := ctx.MultiStore().GetKVStore(testStoreKey)

From fa61b8d401367936f703166eb1e1901afe9f6ad5 Mon Sep 17 00:00:00 2001
From: Uday Patil <udpatil@gmail.com>
Date: Mon, 4 Dec 2023 13:34:30 -0600
Subject: [PATCH 53/65] update unit

---
 store/multiversion/store_test.go | 10 ----------
 1 file changed, 10 deletions(-)

diff --git a/store/multiversion/store_test.go b/store/multiversion/store_test.go
index b2f367f4c..0aaa2158a 100644
--- a/store/multiversion/store_test.go
+++ b/store/multiversion/store_test.go
@@ -93,16 +93,6 @@ func TestMultiVersionStoreWriteToParent(t *testing.T) {
 	require.Equal(t, []byte("value3"), parentKVStore.Get([]byte("key2")))
 	require.False(t, parentKVStore.Has([]byte("key3")))
 	require.False(t, parentKVStore.Has([]byte("key4")))
-
-	// verify no-op if mvs contains ESTIMATE
-	mvs.SetEstimatedWriteset(1, 2, map[string][]byte{
-		"key1": []byte("value1"),
-		"key3": nil,
-		"key4": nil,
-		"key5": nil,
-	})
-	mvs.WriteLatestToStore()
-	require.False(t, parentKVStore.Has([]byte("key5")))
 }
 
 func TestMultiVersionStoreWritesetSetAndInvalidate(t *testing.T) {

From d76f6b640fbd0dcdeb4d8f149c6e4d379f8496c7 Mon Sep 17 00:00:00 2001
From: Steven Landers <steven.landers@gmail.com>
Date: Mon, 4 Dec 2023 14:50:43 -0500
Subject: [PATCH 54/65] revalidate situation

---
 tasks/scheduler.go | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/tasks/scheduler.go b/tasks/scheduler.go
index 78303ab32..923ebc588 100644
--- a/tasks/scheduler.go
+++ b/tasks/scheduler.go
@@ -89,11 +89,11 @@ func (s *scheduler) ProcessAll(ctx sdk.Context, reqs []*sdk.DeliverTxEntry) ([]t
 						}
 					}
 
-					//cancel := hangDebug(func() {
-					//	fmt.Printf("worker=%d, completed=%v\n", worker, queue.IsCompleted())
-					//})
+					cancel := hangDebug(func() {
+						fmt.Printf("worker=%d, completed=%v\n", worker, queue.IsCompleted())
+					})
 					task, anyTasks := queue.NextTask(worker)
-					//cancel()
+					cancel()
 					atomic.AddInt32(&activeCount, 1)
 
 					if !anyTasks {
@@ -148,6 +148,8 @@ func (s *scheduler) processTask(ctx sdk.Context, taskType TaskType, w int, t *Tx
 			TaskLog(t, "waiting/executed...revalidating")
 			if queue.DependenciesFinished(t.Index) {
 				queue.Execute(t.Index)
+			} else {
+				queue.ReValidate(t.Index)
 			}
 		case statusInvalid:
 			TaskLog(t, "invalid (re-executing, re-validating > tx)")

From 617ca971902fa116eaab4c4e5a8e906a17f065c4 Mon Sep 17 00:00:00 2001
From: Steven Landers <steven.landers@gmail.com>
Date: Mon, 4 Dec 2023 15:15:20 -0500
Subject: [PATCH 55/65] notify children

---
 tasks/int_set.go         | 10 ++++++++++
 tasks/scheduler.go       | 10 ++++------
 tasks/scheduler_test.go  |  8 ++++----
 tasks/task.go            |  7 ++++---
 tasks/task_queue.go      | 23 +++++++++++++++++++++--
 tasks/task_validation.go |  2 +-
 tasks/utils.go           |  7 +++++--
 7 files changed, 49 insertions(+), 18 deletions(-)

diff --git a/tasks/int_set.go b/tasks/int_set.go
index 29d772066..b736eff46 100644
--- a/tasks/int_set.go
+++ b/tasks/int_set.go
@@ -35,6 +35,16 @@ func (ss *intSetMap) Add(idx int) {
 	ss.m[idx] = struct{}{}
 }
 
+func (ss *intSetMap) List() []int {
+	ss.mx.RLock()
+	defer ss.mx.RUnlock()
+	list := make([]int, 0, len(ss.m))
+	for k := range ss.m {
+		list = append(list, k)
+	}
+	return list
+}
+
 func (ss *intSetMap) Delete(idx int) {
 	if ss.Exists(idx) {
 		ss.mx.Lock()
diff --git a/tasks/scheduler.go b/tasks/scheduler.go
index 923ebc588..e021607e8 100644
--- a/tasks/scheduler.go
+++ b/tasks/scheduler.go
@@ -146,11 +146,8 @@ func (s *scheduler) processTask(ctx sdk.Context, taskType TaskType, w int, t *Tx
 			// task should be re-validated (waiting on others)
 			// how can we wait on dependencies?
 			TaskLog(t, "waiting/executed...revalidating")
-			if queue.DependenciesFinished(t.Index) {
-				queue.Execute(t.Index)
-			} else {
-				queue.ReValidate(t.Index)
-			}
+			queue.AddDependentToParents(t.Index)
+
 		case statusInvalid:
 			TaskLog(t, "invalid (re-executing, re-validating > tx)")
 			queue.ValidateLaterTasks(t.Index)
@@ -168,7 +165,8 @@ func (s *scheduler) processTask(ctx sdk.Context, taskType TaskType, w int, t *Tx
 
 		if t.IsStatus(statusAborted) {
 			//TODO ideally this would wait until dependencies are finished
-			queue.Execute(t.Index)
+			t.Parents = []int{t.Abort.DependentTxIdx}
+			queue.AddDependentToParents(t.Index)
 		} else {
 			TaskLog(t, fmt.Sprintf("FINISHING task EXECUTION (worker=%d, incarnation=%d)", w, t.Incarnation))
 			queue.FinishExecute(t.Index)
diff --git a/tasks/scheduler_test.go b/tasks/scheduler_test.go
index f93c1b8ab..46b71a2bd 100644
--- a/tasks/scheduler_test.go
+++ b/tasks/scheduler_test.go
@@ -137,9 +137,9 @@ func TestProcessAll(t *testing.T) {
 		{
 			name:      "Test no overlap txs",
 			workers:   20,
-			runs:      100,
+			runs:      10,
 			addStores: true,
-			requests:  requestList(10000),
+			requests:  requestList(1000),
 			deliverTxFunc: func(ctx sdk.Context, req types.RequestDeliverTx) types.ResponseDeliverTx {
 				// all txs read and write to the same key to maximize conflicts
 				kv := ctx.MultiStore().GetKVStore(testStoreKey)
@@ -167,8 +167,8 @@ func TestProcessAll(t *testing.T) {
 		},
 		{
 			name:      "Test every tx accesses same key",
-			workers:   50,
-			runs:      100,
+			workers:   20,
+			runs:      10,
 			addStores: true,
 			requests:  requestList(1000),
 			deliverTxFunc: func(ctx sdk.Context, req types.RequestDeliverTx) types.ResponseDeliverTx {
diff --git a/tasks/task.go b/tasks/task.go
index 387c7ca6e..f8419d910 100644
--- a/tasks/task.go
+++ b/tasks/task.go
@@ -38,7 +38,8 @@ type TxTask struct {
 	taskType      TaskType
 	status        status
 	ExecutionID   string
-	Dependencies  []int
+	Parents       []int
+	Dependents    *intSetMap
 	Abort         *occ.Abort
 	Index         int
 	Executing     byte
@@ -136,7 +137,7 @@ func (dt *TxTask) Reset() {
 	dt.Response = nil
 	dt.Abort = nil
 	dt.AbortCh = nil
-	dt.Dependencies = nil
+	dt.Parents = nil
 	dt.VersionStores = nil
 }
 
@@ -147,7 +148,7 @@ func (dt *TxTask) ResetForExecution() {
 	dt.Response = nil
 	dt.Abort = nil
 	dt.AbortCh = nil
-	dt.Dependencies = nil
+	dt.Parents = nil
 	dt.VersionStores = nil
 }
 
diff --git a/tasks/task_queue.go b/tasks/task_queue.go
index 0b479f0a3..907b44d38 100644
--- a/tasks/task_queue.go
+++ b/tasks/task_queue.go
@@ -2,6 +2,7 @@ package tasks
 
 import (
 	"fmt"
+	"sort"
 	"sync"
 	"sync/atomic"
 )
@@ -15,6 +16,8 @@ const (
 )
 
 type Queue interface {
+	// AddDependentToParents adds a dependent to the parents
+	AddDependentToParents(idx int)
 	// NextTask returns the next task to be executed, or nil if the queue is closed.
 	NextTask(workerID int) (*TxTask, bool)
 	// Close closes the queue, causing NextTask to return false.
@@ -94,12 +97,21 @@ func (sq *taskQueue) isExecuting(idx int) bool {
 
 // FinishExecute marks a task as finished executing and transitions directly validation
 func (sq *taskQueue) FinishExecute(idx int) {
-	defer TaskLog(sq.getTask(idx), "-> finished task execute")
+	t := sq.getTask(idx)
+	defer TaskLog(t, "-> finished task execute")
 
 	//if !sq.isExecuting(idx) {
 	//	TaskLog(sq.getTask(idx), "not executing, but trying to finish execute")
 	//	panic("not executing, but trying to finish execute")
 	//}
+	//TODO: optimize
+	if t.Dependents.Length() > 0 {
+		dependentTasks := t.Dependents.List()
+		sort.Ints(dependentTasks)
+		for _, d := range dependentTasks {
+			sq.execute(d)
+		}
+	}
 
 	sq.executing.Delete(idx)
 	sq.validate(idx)
@@ -147,7 +159,7 @@ func (sq *taskQueue) isFinished(idx int) bool {
 }
 
 func (sq *taskQueue) DependenciesFinished(idx int) bool {
-	for _, dep := range sq.getTask(idx).Dependencies {
+	for _, dep := range sq.getTask(idx).Parents {
 		if !sq.isFinished(dep) {
 			return false
 		}
@@ -155,6 +167,13 @@ func (sq *taskQueue) DependenciesFinished(idx int) bool {
 	return true
 }
 
+func (sq *taskQueue) AddDependentToParents(idx int) {
+	parents := sq.getTask(idx).Parents
+	for _, p := range parents {
+		sq.getTask(p).Dependents.Add(idx)
+	}
+}
+
 // IsCompleted returns true if all tasks are "finished"
 func (sq *taskQueue) IsCompleted() bool {
 	queued := sq.queueLen.Load()
diff --git a/tasks/task_validation.go b/tasks/task_validation.go
index 601a37dd8..7a644cdcd 100644
--- a/tasks/task_validation.go
+++ b/tasks/task_validation.go
@@ -41,7 +41,7 @@ func (s *scheduler) validateTask(ctx sdk.Context, task *TxTask) {
 	defer span.End()
 
 	valid, conflicts := s.findConflicts(task)
-	task.Dependencies = conflicts
+	task.Parents = conflicts
 
 	if !valid {
 		s.invalidateTask(task)
diff --git a/tasks/utils.go b/tasks/utils.go
index b6daef985..fd4592f28 100644
--- a/tasks/utils.go
+++ b/tasks/utils.go
@@ -57,8 +57,11 @@ func toTasks(ctx sdk.Context, reqs []*sdk.DeliverTxEntry) []*TxTask {
 		res = append(res, &TxTask{
 			Request: r.Request,
 			Index:   idx,
-			Ctx:     ctx,
-			status:  statusPending,
+			Dependents: &intSetMap{
+				m: make(map[int]struct{}),
+			},
+			Ctx:    ctx,
+			status: statusPending,
 		})
 	}
 	return res

From 0af9bab1a9ba2727206d28d691ac7322d40ca110 Mon Sep 17 00:00:00 2001
From: Steven Landers <steven.landers@gmail.com>
Date: Mon, 4 Dec 2023 15:34:03 -0500
Subject: [PATCH 56/65] disable hangDebug

---
 tasks/scheduler.go | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tasks/scheduler.go b/tasks/scheduler.go
index e021607e8..67d28e23d 100644
--- a/tasks/scheduler.go
+++ b/tasks/scheduler.go
@@ -89,11 +89,11 @@ func (s *scheduler) ProcessAll(ctx sdk.Context, reqs []*sdk.DeliverTxEntry) ([]t
 						}
 					}
 
-					cancel := hangDebug(func() {
-						fmt.Printf("worker=%d, completed=%v\n", worker, queue.IsCompleted())
-					})
+					//cancel := hangDebug(func() {
+					//	fmt.Printf("worker=%d, completed=%v\n", worker, queue.IsCompleted())
+					//})
 					task, anyTasks := queue.NextTask(worker)
-					cancel()
+					//cancel()
 					atomic.AddInt32(&activeCount, 1)
 
 					if !anyTasks {

From 33edd8331bfa5b7ab7b26b7c2310780322919116 Mon Sep 17 00:00:00 2001
From: Steven Landers <steven.landers@gmail.com>
Date: Mon, 4 Dec 2023 16:04:24 -0500
Subject: [PATCH 57/65] fix validate-after-no-execute case

---
 tasks/task_validation.go | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/tasks/task_validation.go b/tasks/task_validation.go
index 7a644cdcd..6c45cb0d5 100644
--- a/tasks/task_validation.go
+++ b/tasks/task_validation.go
@@ -40,6 +40,11 @@ func (s *scheduler) validateTask(ctx sdk.Context, task *TxTask) {
 	_, span := s.traceSpan(ctx, "SchedulerValidate", task)
 	defer span.End()
 
+	if task.Response == nil {
+		task.SetStatus(statusInvalid)
+		return
+	}
+
 	valid, conflicts := s.findConflicts(task)
 	task.Parents = conflicts
 

From 16d646592c4d43bfdd46d0154dd6cf15b83555c6 Mon Sep 17 00:00:00 2001
From: Steven Landers <steven.landers@gmail.com>
Date: Mon, 4 Dec 2023 16:30:06 -0500
Subject: [PATCH 58/65] handle empty txs

---
 tasks/scheduler.go      | 19 +++++++++++++------
 tasks/scheduler_test.go | 18 ++++++++++++++++--
 2 files changed, 29 insertions(+), 8 deletions(-)

diff --git a/tasks/scheduler.go b/tasks/scheduler.go
index 67d28e23d..e322a3acc 100644
--- a/tasks/scheduler.go
+++ b/tasks/scheduler.go
@@ -59,6 +59,10 @@ func (s *scheduler) initScheduler(ctx sdk.Context, reqs []*sdk.DeliverTxEntry) (
 }
 
 func (s *scheduler) ProcessAll(ctx sdk.Context, reqs []*sdk.DeliverTxEntry) ([]types.ResponseDeliverTx, error) {
+	if len(reqs) == 0 {
+		return []types.ResponseDeliverTx{}, nil
+	}
+
 	var results []types.ResponseDeliverTx
 	var err error
 	counter := atomic.Int32{}
@@ -89,11 +93,11 @@ func (s *scheduler) ProcessAll(ctx sdk.Context, reqs []*sdk.DeliverTxEntry) ([]t
 						}
 					}
 
-					//cancel := hangDebug(func() {
-					//	fmt.Printf("worker=%d, completed=%v\n", worker, queue.IsCompleted())
-					//})
+					cancel := hangDebug(func() {
+						fmt.Printf("worker=%d, completed=%v\n", worker, queue.IsCompleted())
+					})
 					task, anyTasks := queue.NextTask(worker)
-					//cancel()
+					cancel()
 					atomic.AddInt32(&activeCount, 1)
 
 					if !anyTasks {
@@ -146,7 +150,8 @@ func (s *scheduler) processTask(ctx sdk.Context, taskType TaskType, w int, t *Tx
 			// task should be re-validated (waiting on others)
 			// how can we wait on dependencies?
 			TaskLog(t, "waiting/executed...revalidating")
-			queue.AddDependentToParents(t.Index)
+			//queue.AddDependentToParents(t.Index)
+			queue.Execute(t.Index)
 
 		case statusInvalid:
 			TaskLog(t, "invalid (re-executing, re-validating > tx)")
@@ -166,10 +171,12 @@ func (s *scheduler) processTask(ctx sdk.Context, taskType TaskType, w int, t *Tx
 		if t.IsStatus(statusAborted) {
 			//TODO ideally this would wait until dependencies are finished
 			t.Parents = []int{t.Abort.DependentTxIdx}
-			queue.AddDependentToParents(t.Index)
+			//queue.AddDependentToParents(t.Index)
+			queue.Execute(t.Index)
 		} else {
 			TaskLog(t, fmt.Sprintf("FINISHING task EXECUTION (worker=%d, incarnation=%d)", w, t.Incarnation))
 			queue.FinishExecute(t.Index)
+			queue.ValidateLaterTasks(t.Index)
 		}
 
 	default:
diff --git a/tasks/scheduler_test.go b/tasks/scheduler_test.go
index 46b71a2bd..f9b54f5a9 100644
--- a/tasks/scheduler_test.go
+++ b/tasks/scheduler_test.go
@@ -134,6 +134,20 @@ func TestProcessAll(t *testing.T) {
 		expectedErr   error
 		assertions    func(t *testing.T, ctx sdk.Context, res []types.ResponseDeliverTx)
 	}{
+		{
+			name:      "Test zero txs does not hang",
+			workers:   20,
+			runs:      10,
+			addStores: true,
+			requests:  requestList(0),
+			deliverTxFunc: func(ctx sdk.Context, req types.RequestDeliverTx) types.ResponseDeliverTx {
+				panic("should not deliver")
+			},
+			assertions: func(t *testing.T, ctx sdk.Context, res []types.ResponseDeliverTx) {
+				require.Len(t, res, 0)
+			},
+			expectedErr: nil,
+		},
 		{
 			name:      "Test no overlap txs",
 			workers:   20,
@@ -168,9 +182,9 @@ func TestProcessAll(t *testing.T) {
 		{
 			name:      "Test every tx accesses same key",
 			workers:   20,
-			runs:      10,
+			runs:      100,
 			addStores: true,
-			requests:  requestList(1000),
+			requests:  requestList(10000),
 			deliverTxFunc: func(ctx sdk.Context, req types.RequestDeliverTx) types.ResponseDeliverTx {
 				// all txs read and write to the same key to maximize conflicts
 				kv := ctx.MultiStore().GetKVStore(testStoreKey)

From 28333f510d5602bfc4bd278474fc500121feabb5 Mon Sep 17 00:00:00 2001
From: Steven Landers <steven.landers@gmail.com>
Date: Mon, 4 Dec 2023 16:37:19 -0500
Subject: [PATCH 59/65] put the parent notify back

---
 tasks/scheduler.go      | 9 +++------
 tasks/scheduler_test.go | 2 +-
 2 files changed, 4 insertions(+), 7 deletions(-)

diff --git a/tasks/scheduler.go b/tasks/scheduler.go
index e322a3acc..4266a9ed2 100644
--- a/tasks/scheduler.go
+++ b/tasks/scheduler.go
@@ -60,7 +60,7 @@ func (s *scheduler) initScheduler(ctx sdk.Context, reqs []*sdk.DeliverTxEntry) (
 
 func (s *scheduler) ProcessAll(ctx sdk.Context, reqs []*sdk.DeliverTxEntry) ([]types.ResponseDeliverTx, error) {
 	if len(reqs) == 0 {
-		return []types.ResponseDeliverTx{}, nil
+		return nil, nil
 	}
 
 	var results []types.ResponseDeliverTx
@@ -150,8 +150,7 @@ func (s *scheduler) processTask(ctx sdk.Context, taskType TaskType, w int, t *Tx
 			// task should be re-validated (waiting on others)
 			// how can we wait on dependencies?
 			TaskLog(t, "waiting/executed...revalidating")
-			//queue.AddDependentToParents(t.Index)
-			queue.Execute(t.Index)
+			queue.AddDependentToParents(t.Index)
 
 		case statusInvalid:
 			TaskLog(t, "invalid (re-executing, re-validating > tx)")
@@ -171,12 +170,10 @@ func (s *scheduler) processTask(ctx sdk.Context, taskType TaskType, w int, t *Tx
 		if t.IsStatus(statusAborted) {
 			//TODO ideally this would wait until dependencies are finished
 			t.Parents = []int{t.Abort.DependentTxIdx}
-			//queue.AddDependentToParents(t.Index)
-			queue.Execute(t.Index)
+			queue.AddDependentToParents(t.Index)
 		} else {
 			TaskLog(t, fmt.Sprintf("FINISHING task EXECUTION (worker=%d, incarnation=%d)", w, t.Incarnation))
 			queue.FinishExecute(t.Index)
-			queue.ValidateLaterTasks(t.Index)
 		}
 
 	default:
diff --git a/tasks/scheduler_test.go b/tasks/scheduler_test.go
index f9b54f5a9..3c22df38b 100644
--- a/tasks/scheduler_test.go
+++ b/tasks/scheduler_test.go
@@ -184,7 +184,7 @@ func TestProcessAll(t *testing.T) {
 			workers:   20,
 			runs:      100,
 			addStores: true,
-			requests:  requestList(10000),
+			requests:  requestList(100),
 			deliverTxFunc: func(ctx sdk.Context, req types.RequestDeliverTx) types.ResponseDeliverTx {
 				// all txs read and write to the same key to maximize conflicts
 				kv := ctx.MultiStore().GetKVStore(testStoreKey)

From 1109797bb055d526e1ae48896b9323cc5354710c Mon Sep 17 00:00:00 2001
From: Steven Landers <steven.landers@gmail.com>
Date: Mon, 4 Dec 2023 16:37:56 -0500
Subject: [PATCH 60/65] nil -> empty slice

---
 tasks/scheduler.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tasks/scheduler.go b/tasks/scheduler.go
index 4266a9ed2..33316788e 100644
--- a/tasks/scheduler.go
+++ b/tasks/scheduler.go
@@ -60,7 +60,7 @@ func (s *scheduler) initScheduler(ctx sdk.Context, reqs []*sdk.DeliverTxEntry) (
 
 func (s *scheduler) ProcessAll(ctx sdk.Context, reqs []*sdk.DeliverTxEntry) ([]types.ResponseDeliverTx, error) {
 	if len(reqs) == 0 {
-		return nil, nil
+		return []types.ResponseDeliverTx{}, nil
 	}
 
 	var results []types.ResponseDeliverTx

From a3d4f5e41d9c3d2371de6bee42ca763a92705a56 Mon Sep 17 00:00:00 2001
From: Steven Landers <steven.landers@gmail.com>
Date: Mon, 4 Dec 2023 17:50:10 -0500
Subject: [PATCH 61/65] add mx.lock

---
 tasks/scheduler.go | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/tasks/scheduler.go b/tasks/scheduler.go
index 33316788e..2eeb606b1 100644
--- a/tasks/scheduler.go
+++ b/tasks/scheduler.go
@@ -81,17 +81,19 @@ func (s *scheduler) ProcessAll(ctx sdk.Context, reqs []*sdk.DeliverTxEntry) ([]t
 				defer wg.Done()
 
 				for {
+
+					mx.Lock()
 					if atomic.LoadInt32(&activeCount) == 0 {
-						if queue.IsCompleted() && mx.TryLock() {
+						if queue.IsCompleted() {
 							if final.Load() {
 								queue.Close()
 							} else {
 								final.Store(true)
 								queue.ValidateAll()
 							}
-							mx.Unlock()
 						}
 					}
+					mx.Unlock()
 
 					cancel := hangDebug(func() {
 						fmt.Printf("worker=%d, completed=%v\n", worker, queue.IsCompleted())
@@ -125,7 +127,7 @@ func (s *scheduler) ProcessAll(ctx sdk.Context, reqs []*sdk.DeliverTxEntry) ([]t
 		results = collectResponses(s.tasks)
 		err = nil
 	})
-	s.timer.PrintReport()
+	//s.timer.PrintReport()
 	//fmt.Printf("Total Tasks: %d\n", counter.Load())
 
 	return results, err

From 41045e35bdb478176bed07f19937bb3c0f38e248 Mon Sep 17 00:00:00 2001
From: Steven Landers <steven.landers@gmail.com>
Date: Mon, 4 Dec 2023 17:51:25 -0500
Subject: [PATCH 62/65] mx.lock

---
 tasks/scheduler.go | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tasks/scheduler.go b/tasks/scheduler.go
index 2eeb606b1..edd5785a5 100644
--- a/tasks/scheduler.go
+++ b/tasks/scheduler.go
@@ -82,8 +82,8 @@ func (s *scheduler) ProcessAll(ctx sdk.Context, reqs []*sdk.DeliverTxEntry) ([]t
 
 				for {
 
-					mx.Lock()
 					if atomic.LoadInt32(&activeCount) == 0 {
+						mx.Lock()
 						if queue.IsCompleted() {
 							if final.Load() {
 								queue.Close()
@@ -92,8 +92,8 @@ func (s *scheduler) ProcessAll(ctx sdk.Context, reqs []*sdk.DeliverTxEntry) ([]t
 								queue.ValidateAll()
 							}
 						}
+						mx.Unlock()
 					}
-					mx.Unlock()
 
 					cancel := hangDebug(func() {
 						fmt.Printf("worker=%d, completed=%v\n", worker, queue.IsCompleted())

From 3f6b95383fe2d8b64a53a51455a75c26218f14e9 Mon Sep 17 00:00:00 2001
From: Steven Landers <steven.landers@gmail.com>
Date: Mon, 4 Dec 2023 17:58:10 -0500
Subject: [PATCH 63/65] add debug log

---
 tasks/scheduler.go | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/tasks/scheduler.go b/tasks/scheduler.go
index edd5785a5..1184a9ea3 100644
--- a/tasks/scheduler.go
+++ b/tasks/scheduler.go
@@ -6,6 +6,7 @@ import (
 	sdk "github.com/cosmos/cosmos-sdk/types"
 	"github.com/cosmos/cosmos-sdk/utils/tracing"
 	"github.com/tendermint/tendermint/abci/types"
+	"strings"
 	"sync"
 	"sync/atomic"
 )
@@ -96,6 +97,14 @@ func (s *scheduler) ProcessAll(ctx sdk.Context, reqs []*sdk.DeliverTxEntry) ([]t
 					}
 
 					cancel := hangDebug(func() {
+						if worker == 0 && !queue.IsCompleted() {
+							// produce a report of tasks mapped by status
+							var lines []string
+							for _, t := range s.tasks {
+								lines = append(lines, fmt.Sprintf("Task(idx=%d, status=%s, incarnation=%d):\t%s", t.Index, t.status, t.Incarnation, "status"))
+							}
+							fmt.Println(strings.Join(lines, "\n"))
+						}
 						fmt.Printf("worker=%d, completed=%v\n", worker, queue.IsCompleted())
 					})
 					task, anyTasks := queue.NextTask(worker)

From cad9b98b7f646761dd115c7223afaadf6b8d010d Mon Sep 17 00:00:00 2001
From: Uday Patil <udpatil@gmail.com>
Date: Tue, 5 Dec 2023 10:20:41 -0600
Subject: [PATCH 64/65] add height to log

---
 tasks/scheduler.go | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/tasks/scheduler.go b/tasks/scheduler.go
index 1184a9ea3..db9d811f2 100644
--- a/tasks/scheduler.go
+++ b/tasks/scheduler.go
@@ -2,13 +2,14 @@ package tasks
 
 import (
 	"fmt"
+	"strings"
+	"sync"
+	"sync/atomic"
+
 	"github.com/cosmos/cosmos-sdk/store/multiversion"
 	sdk "github.com/cosmos/cosmos-sdk/types"
 	"github.com/cosmos/cosmos-sdk/utils/tracing"
 	"github.com/tendermint/tendermint/abci/types"
-	"strings"
-	"sync"
-	"sync/atomic"
 )
 
 // Scheduler processes tasks concurrently
@@ -98,6 +99,7 @@ func (s *scheduler) ProcessAll(ctx sdk.Context, reqs []*sdk.DeliverTxEntry) ([]t
 
 					cancel := hangDebug(func() {
 						if worker == 0 && !queue.IsCompleted() {
+							fmt.Printf("Logging tasks for height %d \n", ctx.BlockHeight())
 							// produce a report of tasks mapped by status
 							var lines []string
 							for _, t := range s.tasks {

From a33813b6e0bb562507e8a9507cebbf23d4e69128 Mon Sep 17 00:00:00 2001
From: Steven Landers <steven.landers@gmail.com>
Date: Tue, 5 Dec 2023 11:28:15 -0500
Subject: [PATCH 65/65] fix hang

---
 tasks/scheduler.go      | 16 +++++++++++-----
 tasks/scheduler_test.go |  2 +-
 tasks/task.go           |  6 ++++++
 tasks/task_queue.go     | 10 +++++++---
 4 files changed, 25 insertions(+), 9 deletions(-)

diff --git a/tasks/scheduler.go b/tasks/scheduler.go
index db9d811f2..2ecd28744 100644
--- a/tasks/scheduler.go
+++ b/tasks/scheduler.go
@@ -162,8 +162,8 @@ func (s *scheduler) processTask(ctx sdk.Context, taskType TaskType, w int, t *Tx
 		case statusWaiting:
 			// task should be re-validated (waiting on others)
 			// how can we wait on dependencies?
-			TaskLog(t, "waiting/executed...revalidating")
-			queue.AddDependentToParents(t.Index)
+			TaskLog(t, "waiting, executing again")
+			queue.Execute(t.Index)
 
 		case statusInvalid:
 			TaskLog(t, "invalid (re-executing, re-validating > tx)")
@@ -181,9 +181,15 @@ func (s *scheduler) processTask(ctx sdk.Context, taskType TaskType, w int, t *Tx
 		s.executeTask(t)
 
 		if t.IsStatus(statusAborted) {
-			//TODO ideally this would wait until dependencies are finished
-			t.Parents = []int{t.Abort.DependentTxIdx}
-			queue.AddDependentToParents(t.Index)
+			parent := s.tasks[t.Abort.DependentTxIdx]
+			parent.LockTask()
+			if parent.IsTaskType(TypeExecution) {
+				t.Parents = []int{t.Abort.DependentTxIdx}
+				queue.AddDependentToParents(t.Index)
+			} else {
+				queue.Execute(t.Index)
+			}
+			parent.UnlockTask()
 		} else {
 			TaskLog(t, fmt.Sprintf("FINISHING task EXECUTION (worker=%d, incarnation=%d)", w, t.Incarnation))
 			queue.FinishExecute(t.Index)
diff --git a/tasks/scheduler_test.go b/tasks/scheduler_test.go
index 3c22df38b..620fdfd14 100644
--- a/tasks/scheduler_test.go
+++ b/tasks/scheduler_test.go
@@ -182,7 +182,7 @@ func TestProcessAll(t *testing.T) {
 		{
 			name:      "Test every tx accesses same key",
 			workers:   20,
-			runs:      100,
+			runs:      1000,
 			addStores: true,
 			requests:  requestList(100),
 			deliverTxFunc: func(ctx sdk.Context, req types.RequestDeliverTx) types.ResponseDeliverTx {
diff --git a/tasks/task.go b/tasks/task.go
index f8419d910..a44113303 100644
--- a/tasks/task.go
+++ b/tasks/task.go
@@ -92,6 +92,12 @@ func (dt *TxTask) updateTaskType(tt TaskType) bool {
 	return false
 }
 
+func (dt *TxTask) IsTaskType(tt TaskType) bool {
+	dt.rwMx.RLock()
+	defer dt.rwMx.RUnlock()
+	return dt.taskType == tt
+}
+
 func (dt *TxTask) PopTaskType() (TaskType, bool) {
 	dt.rwMx.Lock()
 	defer dt.rwMx.Unlock()
diff --git a/tasks/task_queue.go b/tasks/task_queue.go
index 907b44d38..0d6c4dbee 100644
--- a/tasks/task_queue.go
+++ b/tasks/task_queue.go
@@ -85,7 +85,7 @@ func (sq *taskQueue) getTask(idx int) *TxTask {
 
 func (sq *taskQueue) validate(idx int) {
 	task := sq.getTask(idx)
-	if sq.getTask(idx).SetTaskType(TypeValidation) {
+	if task.SetTaskType(TypeValidation) {
 		TaskLog(task, "-> validate")
 		sq.pushTask(idx, TypeValidation)
 	}
@@ -105,6 +105,7 @@ func (sq *taskQueue) FinishExecute(idx int) {
 	//	panic("not executing, but trying to finish execute")
 	//}
 	//TODO: optimize
+	t.LockTask()
 	if t.Dependents.Length() > 0 {
 		dependentTasks := t.Dependents.List()
 		sort.Ints(dependentTasks)
@@ -112,6 +113,7 @@ func (sq *taskQueue) FinishExecute(idx int) {
 			sq.execute(d)
 		}
 	}
+	t.UnlockTask()
 
 	sq.executing.Delete(idx)
 	sq.validate(idx)
@@ -135,8 +137,8 @@ func (sq *taskQueue) ReValidate(idx int) {
 
 func (sq *taskQueue) Execute(idx int) {
 	task := sq.tasks[idx]
-	TaskLog(task, fmt.Sprintf("-> Execute (%d)", sq.getTask(idx).Incarnation))
 	task.Increment()
+	TaskLog(task, fmt.Sprintf("-> Execute (%d)", task.Incarnation))
 	sq.execute(idx)
 }
 
@@ -150,7 +152,9 @@ func (sq *taskQueue) ValidateAll() {
 // any executing tasks are skipped
 func (sq *taskQueue) ValidateLaterTasks(afterIdx int) {
 	for idx := afterIdx + 1; idx < len(sq.tasks); idx++ {
-		sq.validate(idx)
+		if !sq.isExecuting(idx) {
+			sq.validate(idx)
+		}
 	}
 }