Skip to content

Commit

Permalink
docs: updated documentation to include diagrams and examples
Browse files Browse the repository at this point in the history
updated formatting
  • Loading branch information
mightyshazam committed Mar 8, 2024
1 parent 93d2af6 commit 1e8de08
Show file tree
Hide file tree
Showing 5 changed files with 139 additions and 0 deletions.
80 changes: 80 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -1 +1,81 @@
<!-- START doctoc generated TOC please keep comment here to allow auto update -->
<!-- DON'T EDIT THIS SECTION, INSTEAD RE-RUN doctoc TO UPDATE -->
**Table of Contents** *generated with [DocToc](https://github.com/thlorenz/doctoc)*

- [Quick Start](#quick-start)

<!-- END doctoc generated TOC please keep comment here to allow auto update -->

This package is a C# wrapper around [delta-rs](https://github.com/delta-io/delta-rs/tree/rust-v0.17.0).

It uses the [tokio-rs](https://tokio.rs/) runtime to provide asynchronous behavior. This allows the usage of .NET Tasks and async/await to take advantage of the same behavior provided by the underlying rust library.
This library also takes advantage of the [Apache Arrow](https://github.com/apache/arrow/blob/main/csharp/README.md) C# IPC formats to minimize the amount of copying required to move data between runtimes.

![alt text](architecture_simple.png "Using a Rust bridge library with .NET p/invoke")

The bridge library incorporates delta-rs and [tokio-rs](https://tokio.rs/) as shown in the image below.
![alt text](architecture_expanded.png "Rust bridge library with tokio")

## Quick Start

```csharp
using System.Collections.Generic;
using System.Linq;
using System.Threading;
using System.Threading.Tasks;
using Apache.Arrow;
using Apache.Arrow.Memory;
using Apache.Arrow.Types;
using DeltaLake.Runtime;
using DeltaLake.Table;


public static Runtime CreateRuntime()
{
return new DeltaRuntime(RuntimeOptions.Default);
}

public static Task<DeltaTable> CreateDeltaTable(
Runtime runtime,
string path,
CancellationToken cancellationToken
)
{
var builder = new Apache.Arrow.Schema.Builder();
builder.Field(fb =>
{
fb.Name("test");
fb.DataType(Int32Type.Default);
fb.Nullable(false);
});
var schema = builder.Build();
return DeltaTable.CreateAsync(
runtime,
new TableCreateOptions(uri, schema)
{
Configuration = new Dictionary<string, string?>(),
},
cancellationToken);
}

public static Task<DeltaTable, Runtime> InsertIntoTable(
DeltaTable table,
CancellationToken cancellationToken)
{
var allocator = new NativeMemoryAllocator();
var recordBatchBuilder = new RecordBatch.Builder(allocator)
.Append(
"test",
false,
col => col.Int32(arr => arr.AppendRange(Enumerable.Range(0, length))));
var options = new InsertOptions
{
SaveMode = SaveMode.Append,
};
await table.InsertAsync(
[recordBatchBuilder.Build()],
schema,
options,
cancellationToken);
}
```
Binary file added architecture_expanded.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added architecture_simple.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
47 changes: 47 additions & 0 deletions examples/local/Program.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
using System.Collections.Generic;
using System.Linq;
using System.Threading;
using System.Threading.Tasks;
using Apache.Arrow;
using Apache.Arrow.Memory;
using Apache.Arrow.Types;
using DeltaLake.Runtime;
using DeltaLake.Table;

namespace local;

public class Program
{
public static async Task Main(string[] args)
{
var uri = args[0];
using var runtime = new DeltaRuntime(RuntimeOptions.Default);
var builder = new Apache.Arrow.Schema.Builder();
builder.Field(fb =>
{
fb.Name("test");
fb.DataType(Int32Type.Default);
fb.Nullable(false);
});
var schema = builder.Build();
var allocator = new NativeMemoryAllocator();
var recordBatchBuilder = new RecordBatch.Builder(allocator)
.Append("test", false, col => col.Int32(arr => arr.AppendRange(Enumerable.Range(0, length))));
using var table = await DeltaTable.CreateAsync(
runtime,
new TableCreateOptions(uri, schema)
{
Configuration = new Dictionary<string, string?>
{
["delta.dataSkippingNumIndexedCols"] = "32",
["delta.setTransactionRetentionDuration"] = null,
}
},
CancellationToken.None);
var options = new InsertOptions
{
SaveMode = SaveMode.Append,
};
await table.InsertAsync([recordBatchBuilder.Build()], schema, options, CancellationToken.None);
}
}
12 changes: 12 additions & 0 deletions examples/local/local.csproj
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
<Project Sdk="Microsoft.NET.Sdk">

<PropertyGroup>
<OutputType>Exe</OutputType>
<TargetFramework>net8.0</TargetFramework>
</PropertyGroup>

<ItemGroup>
<ProjectReference Include="..\..\src\DeltaLake\DeltaLake.csproj" />
</ItemGroup>

</Project>

0 comments on commit 1e8de08

Please sign in to comment.