Skip to content

Commit

Permalink
tool to upload displays info to GCS
Browse files Browse the repository at this point in the history
  • Loading branch information
gegaowp committed Mar 3, 2025
1 parent 42fc905 commit 31fd36e
Show file tree
Hide file tree
Showing 9 changed files with 870 additions and 6 deletions.
25 changes: 25 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,7 @@ members = [
"crates/sui-transactional-test-runner",
"crates/sui-types",
"crates/sui-upgrade-compatibility-transactional-tests",
"crates/sui-upload-display",
"crates/sui-verifier-transactional-tests",
"crates/suins-indexer",
"crates/suiop-cli",
Expand Down
6 changes: 3 additions & 3 deletions crates/sui-indexer-alt-framework/src/ingestion/client.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ use url::Url;
const MAX_TRANSIENT_RETRY_INTERVAL: Duration = Duration::from_secs(60);

#[async_trait::async_trait]
pub(crate) trait IngestionClientTrait: Send + Sync {
pub trait IngestionClientTrait: Send + Sync {
async fn fetch(&self, checkpoint: u64) -> FetchResult;
}

Expand Down Expand Up @@ -53,7 +53,7 @@ pub struct IngestionClient {
}

impl IngestionClient {
pub(crate) fn new_remote(url: Url, metrics: Arc<IndexerMetrics>) -> IngestionResult<Self> {
pub fn new_remote(url: Url, metrics: Arc<IndexerMetrics>) -> IngestionResult<Self> {
let client = Arc::new(RemoteIngestionClient::new(url)?);
Ok(Self::new_impl(client, metrics))
}
Expand Down Expand Up @@ -120,7 +120,7 @@ impl IngestionClient {
/// [FetchError::NotFound] and [FetchError::Permanent] variants.
///
/// - Cancellation of the supplied `cancel` token.
pub(crate) async fn fetch(
pub async fn fetch(
&self,
checkpoint: u64,
cancel: &CancellationToken,
Expand Down
2 changes: 1 addition & 1 deletion crates/sui-indexer-alt-framework/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ use tracing::{info, warn};

pub mod handlers;
pub mod ingestion;
pub(crate) mod metrics;
pub mod metrics;
pub mod models;
pub mod pipeline;
pub mod schema;
Expand Down
4 changes: 2 additions & 2 deletions crates/sui-indexer-alt-framework/src/metrics.rs
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ const BATCH_SIZE_BUCKETS: &[f64] = &[
];

#[derive(Clone)]
pub(crate) struct IndexerMetrics {
pub struct IndexerMetrics {
// Statistics related to fetching data from the remote store.
pub total_ingested_checkpoints: IntCounter,
pub total_ingested_transactions: IntCounter,
Expand Down Expand Up @@ -145,7 +145,7 @@ pub(crate) struct CheckpointLagMetricReporter {
}

impl IndexerMetrics {
pub(crate) fn new(registry: &Registry) -> Arc<Self> {
pub fn new(registry: &Registry) -> Arc<Self> {
Arc::new(Self {
total_ingested_checkpoints: register_int_counter_with_registry!(
"indexer_total_ingested_checkpoints",
Expand Down
34 changes: 34 additions & 0 deletions crates/sui-upload-display/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
[package]
name = "sui-upload-display"
version = "0.1.0"
edition = "2021"
authors = ["Sui"]
license = "Apache-2.0"
publish = false

[dependencies]
anyhow.workspace = true
bcs.workspace = true
bytes = { workspace = true }
clap.workspace = true
csv.workspace = true
dashmap = { workspace = true }
futures.workspace = true
hex.workspace = true
object_store.workspace = true
prometheus.workspace = true
regex = { workspace = true }
sui-indexer-alt-framework.workspace = true
sui-types = { workspace = true }
telemetry-subscribers.workspace = true
tokio = { workspace = true, features = ["full", "macros"] }
tokio-util = { workspace = true, features = ["rt"] }
tracing.workspace = true
url.workspace = true

[dev-dependencies]
tempfile = { workspace = true }

[[bin]]
name = "sui-upload-display"
path = "src/main.rs"
70 changes: 70 additions & 0 deletions crates/sui-upload-display/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
# Sui Upload Display

A service that extracts and processes display data from Sui blockchain checkpoints and uploads it to Google Cloud Storage (GCS).

## Features
- Processes Sui blockchain checkpoints in batches
- Extracts display data from transaction events
- Tracks and maintains state across epochs
- Uploads display data to Google Cloud Storage as CSV files
- Supports concurrent processing for improved performance

## Usage

To run the service with all options:

```
cargo run -p sui-upload-display -- \
--gcs-cred-path="/path/to/credentials.json" \
--gcs-display-bucket="bucket-name" \
--remote-url="https://checkpoints.mainnet.sui.io" \
--concurrency-limit=20 \
--batch-size=200
```

With minimal options (using defaults):

```
cargo run -p sui-upload-display -- \
--gcs-cred-path="/path/to/credentials.json" \
--gcs-display-bucket="bucket-name"
```

Get help on available options:

```
cargo run -p sui-upload-display -- --help
```

## Configuration Options

### Command-line Arguments

- `--gcs-cred-path`: Path to Google Cloud Service Account credentials JSON file
- `--gcs-display-bucket`: Name of the Google Cloud Storage bucket to upload files to
- `--remote-url`: URL of the fullnode to fetch checkpoint data from (default: "https://fullnode.mainnet.sui.io:443")
- `--concurrency-limit`: Number of concurrent checkpoints to process (default: 10)
- `--batch-size`: Number of checkpoints to process in one batch (default: 100)

## Implementation Details

The service works as follows:

1. Finds the last processed checkpoint by examining existing files in the GCS bucket
2. Initializes with the epoch data from the latest checkpoint file (if any)
3. Processes batches of checkpoints in the configured batch size
4. For each checkpoint in the batch:
- Fetches checkpoint data from the Sui fullnode
- Extracts display update events
- Stores the updates in memory with their checkpoint and epoch information
5. After processing a batch, updates the in-memory epoch data with the new display entries
6. When an end-of-epoch is detected, uploads the complete display data to GCS
7. Continues to the next batch of checkpoints

The display data is formatted in CSV files with the following columns:
- `object_type`: Type of the object (hex-encoded)
- `id`: Display ID (hex-encoded)
- `version`: Display version
- `bcs`: BCS-encoded display data (hex-encoded)

Files are named with the format `displays_{epoch}_{checkpoint}.csv`
Loading

0 comments on commit 31fd36e

Please sign in to comment.