Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

json-serde: streaming md5 #382

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
59 changes: 39 additions & 20 deletions bench/algorithm/json-serde/1.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,24 +10,57 @@ import (
)

func main() {
if err := run(); err != nil {
fmt.Fprintln(os.Stderr, err)
}
}

func run() error {
fileName := "sample"
n := 10

if len(os.Args) > 1 {
fileName = os.Args[1]
}
if len(os.Args) > 2 {
n, _ = strconv.Atoi(os.Args[2])
var err error
n, err = strconv.Atoi(os.Args[2])
if err != nil {
return err
}
}

jsonStr, err := ioutil.ReadFile(fileName + ".json")
if err != nil {
return err
}

var data GeoData
jsonStr, _ := ioutil.ReadFile(fileName + ".json")
json.Unmarshal([]byte(jsonStr), &data)
printHash(data.ToJsonString())
if err := json.Unmarshal([]byte(jsonStr), &data); err != nil {
return err
}

bytes, err := json.Marshal(data)
if err != nil {
return err
}
printHash(bytes)

array := make([]GeoData, 0, n)
for i := 0; i < n; i++ {
json.Unmarshal([]byte(jsonStr), &data)
if err := json.Unmarshal([]byte(jsonStr), &data); err != nil {
return err
}
array = append(array, data)
}
printHash(ToJsonString(array))

bytes, err = json.Marshal(array)
if err != nil {
return err
}
printHash(bytes)

return nil
}

func printHash(json []byte) {
Expand All @@ -41,20 +74,6 @@ type GeoData struct {
Features []Feature `json:"features"`
}

func ToJsonString(array []GeoData) []byte {
if bytes, err := json.Marshal(array); err == nil {
return bytes
}
return []byte{}
}

func (data *GeoData) ToJsonString() []byte {
if bytes, err := json.Marshal(data); err == nil {
return bytes
}
return []byte{}
}

type Feature struct {
Type string `json:"type"`
Properties Properties `json:"properties"`
Expand Down
59 changes: 39 additions & 20 deletions bench/algorithm/json-serde/2-ffi.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,24 +11,57 @@ import (
)

func main() {
if err := run(); err != nil {
fmt.Fprintln(os.Stderr, err)
}
}

func run() error {
fileName := "sample"
n := 10
if len(os.Args) > 1 {

fileName = os.Args[1]
}
if len(os.Args) > 2 {
n, _ = strconv.Atoi(os.Args[2])
var err error
n, err = strconv.Atoi(os.Args[2])
if err != nil {
return err
}
}

jsonStr, err := ioutil.ReadFile(fileName + ".json")
if err != nil {
return err
}

var data GeoData
jsonStr, _ := ioutil.ReadFile(fileName + ".json")
json.Unmarshal([]byte(jsonStr), &data)
printHash(data.ToJsonString())
if err := json.Unmarshal(jsonStr, &data); err != nil {
return err
}

bytes, err := json.Marshal(data)
if err != nil {
return err
}
printHash(bytes)

array := make([]GeoData, 0, n)
for i := 0; i < n; i++ {
json.Unmarshal([]byte(jsonStr), &data)
if err := json.Unmarshal(jsonStr, &data); err != nil {
return err
}
array = append(array, data)
}
printHash(ToJsonString(array))

bytes, err = json.Marshal(array)
if err != nil {
return err
}
printHash(bytes)

return nil
}

func printHash(json []byte) {
Expand All @@ -42,20 +75,6 @@ type GeoData struct {
Features []Feature `json:"features"`
}

func ToJsonString(array []GeoData) []byte {
if bytes, err := json.Marshal(array); err == nil {
return bytes
}
return []byte{}
}

func (data *GeoData) ToJsonString() []byte {
if bytes, err := json.Marshal(data); err == nil {
return bytes
}
return []byte{}
}

type Feature struct {
Type string `json:"type"`
Properties Properties `json:"properties"`
Expand Down
120 changes: 120 additions & 0 deletions bench/algorithm/json-serde/2-streaming.zig
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
const std = @import("std");
const json = std.json;

const global_allocator = std.heap.c_allocator;

pub fn main() !void {
const args = try std.process.argsAlloc(global_allocator);
defer std.process.argsFree(global_allocator, args);

const file = if (args.len > 1) blk: {
var file_name = try std.mem.concat(global_allocator, u8, &.{ args[1], ".json" });
defer global_allocator.free(file_name);
break :blk try std.fs.cwd().openFile(file_name, .{});
} else try std.fs.cwd().openFile("sample.json", .{});

var n: usize = 3;
if (args.len > 2) {
n = try std.fmt.parseInt(usize, args[2], 10);
}

const json_str = try file.readToEndAlloc(global_allocator, std.math.maxInt(u32));
defer global_allocator.free(json_str);
{
var tokens = json.TokenStream.init(json_str);
const data = try json.parse(GeoData, &tokens, .{ .allocator = global_allocator });
defer json.parseFree(GeoData, data, .{ .allocator = global_allocator });

var md5 = StreamingMd5.init();
try json.stringify(data, .{}, md5.writer());
md5.printHash();
}

{
var array = std.ArrayList(GeoData).init(global_allocator);
defer {
for (array.items) |data|
json.parseFree(GeoData, data, .{ .allocator = global_allocator });
array.deinit();
}
var i: usize = 0;
while (i < n) : (i += 1) {
var tokens = json.TokenStream.init(json_str);
const data = try json.parse(GeoData, &tokens, .{ .allocator = global_allocator });
try array.append(data);
}

var md5 = StreamingMd5.init();
try json.stringify(array.items, .{}, md5.writer());
md5.printHash();
}
}

const Md5 = std.crypto.hash.Md5;

const StreamingMd5 = struct {
md: Md5,

pub fn init() StreamingMd5 {
return .{ .md = Md5.init(.{}) };
}

pub fn writer(self: *StreamingMd5) std.io.Writer(*StreamingMd5, error{}, StreamingMd5.update) {
return .{ .context = self };
}

fn update(self: *StreamingMd5, buf: []const u8) error{}!usize {
self.md.update(buf);
return buf.len;
}

pub fn printHash(self: *StreamingMd5) void {
var out: [Md5.digest_length]u8 = undefined;
self.md.final(&out);
const stdout = std.io.getStdOut().writer();
stdout.print("{s}\n", .{std.fmt.fmtSliceHexLower(&out)}) catch {};
}
};

const GeoData = struct {
type: []const u8,
features: []const Feature,
};
const Feature = struct {
type: []const u8,
properties: Properties,
geometry: Geometry,
};
const Properties = struct { name: []const u8 };
const Geometry = struct {
type: []const u8,
coordinates: []const []const [2]f64,
// provide a custom jsonStringify
// - this is only necessary to remove spaces between coordinates array
// and end up with the correct md5 (compared with 1.js)
pub fn jsonStringify(
value: Geometry,
_: json.StringifyOptions,
out_stream: anytype,
) @TypeOf(out_stream).Error!void {
const typestr =
\\{"type":"
;
_ = try out_stream.write(typestr);
_ = try out_stream.write(value.type);
const coordsstr =
\\","coordinates":[
;
_ = try out_stream.write(coordsstr);
for (value.coordinates, 0..) |row, rowi| {
if (rowi != 0) _ = try out_stream.write(",");
_ = try out_stream.write("[");
for (row, 0..) |col, coli| {
if (coli != 0) _ = try out_stream.write(",");
try out_stream.print("[{d},{d}]", .{ col[0], col[1] });
}
_ = try out_stream.write("]");
}
_ = try out_stream.write("]}");
}
};
108 changes: 108 additions & 0 deletions bench/algorithm/json-serde/3-streaming.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
package main

import (
"crypto/md5"
"encoding/json"
"fmt"
"io"
"io/ioutil"
"os"
"strconv"
)

func main() {
if err := run(); err != nil {
fmt.Fprintln(os.Stderr, err)
}
}

func run() error {
fileName := "sample"
n := 10

if len(os.Args) > 1 {
fileName = os.Args[1]
}
if len(os.Args) > 2 {
var err error
n, err = strconv.Atoi(os.Args[2])
if err != nil {
return err
}
}

var data GeoData
jsonStr, err := ioutil.ReadFile(fileName + ".json")
if err != nil {
return err
}

if err := json.Unmarshal([]byte(jsonStr), &data); err != nil {
return err
}
if err := encodeHash(data); err != nil {
return err
}

array := make([]GeoData, 0, n)
for i := 0; i < n; i++ {
var data GeoData
if err := json.Unmarshal([]byte(jsonStr), &data); err != nil {
return err
}
array = append(array, data)
}

if err := encodeHash(array); err != nil {
return err
}
return nil
}

type lastNewlineIgnorerWriter struct {
w io.Writer
}

func (w lastNewlineIgnorerWriter) Write(b []byte) (int, error) {
if b[len(b)-1] != '\n' {
return w.w.Write(b)
}

_, err := w.w.Write(b[:len(b)-1])
if err != nil {
return 0, err
}
return len(b), nil
}

func encodeHash(data any) error {
hasher := md5.New()
// Ignore the last byte if it is a newline character, streaming encoder
// adds it to the end of the json.
encoder := json.NewEncoder(lastNewlineIgnorerWriter{w: hasher})
if err := encoder.Encode(data); err != nil {
return err
}
fmt.Printf("%x\n", hasher.Sum(nil))
return nil
}

type GeoData struct {
Type string `json:"type"`
Features []Feature `json:"features"`
}

type Feature struct {
Type string `json:"type"`
Properties Properties `json:"properties"`
Geometry Geometry `json:"geometry"`
}

type Properties struct {
Name string `json:"name"`
}

type Geometry struct {
Type string `json:"type"`
Coordinates [][][2]float64 `json:"coordinates"`
}
Loading