Skip to content

Commit

Permalink
AVRO-3839: [Rust] Replace lazy_static crate with std::sync::OnceLock (#…
Browse files Browse the repository at this point in the history
…2461)

* AVRO-3839: [Rust] Replace lazy_static crate with std::sync::OnceLock

Signed-off-by: Martin Tzvetanov Grigorov <[email protected]>

* AVRO-3839: [Rust] Bump minimal Rust version to 1.70.0

Signed-off-by: Martin Tzvetanov Grigorov <[email protected]>

---------

Signed-off-by: Martin Tzvetanov Grigorov <[email protected]>
  • Loading branch information
martin-g authored Dec 11, 2023
1 parent bc9dc05 commit 92512b2
Show file tree
Hide file tree
Showing 13 changed files with 273 additions and 155 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/test-lang-rust-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ jobs:
- 'stable'
- 'beta'
- 'nightly'
- '1.65.0' # MSRV
- '1.70.0' # MSRV
target:
- x86_64-unknown-linux-gnu
- wasm32-unknown-unknown
Expand Down Expand Up @@ -273,4 +273,4 @@ jobs:
- name: Build
run: |
set -x
./build.sh test
./build.sh test
2 changes: 1 addition & 1 deletion .github/workflows/test-lang-rust-clippy.yml
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ jobs:
matrix:
rust:
- 'stable'
- '1.65.0' # MSRV
- '1.70.0' # MSRV
steps:
- uses: actions/checkout@v4
- uses: dtolnay/rust-toolchain@nightly
Expand Down
2 changes: 0 additions & 2 deletions lang/rust/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 1 addition & 2 deletions lang/rust/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -34,14 +34,13 @@ authors = ["Apache Avro team <[email protected]>"]
license = "Apache-2.0"
repository = "https://github.com/apache/avro"
edition = "2021"
rust-version = "1.65.0"
rust-version = "1.70.0"
keywords = ["avro", "data", "serialization"]
categories = ["encoding"]
documentation = "https://docs.rs/apache-avro"

# dependencies used by more than one members
[workspace.dependencies]
lazy_static = { default-features = false, version = "1.4.0" }
log = { default-features = false, version = "0.4.20" }
serde = { default-features = false, version = "1.0.193", features = ["derive"] }
serde_json = { default-features = false, version = "1.0.108", features = ["std"] }
Expand Down
1 change: 0 additions & 1 deletion lang/rust/avro/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,6 @@ bigdecimal = { default-features = false, version = "0.4.2", features = ["std"] }
bzip2 = { default-features = false, version = "0.4.4", optional = true }
crc32fast = { default-features = false, version = "1.3.2", optional = true }
digest = { default-features = false, version = "0.10.7", features = ["core-api"] }
lazy_static = { workspace = true }
libflate = { default-features = false, version = "2.0.0", features = ["std"] }
log = { workspace = true }
num-bigint = { default-features = false, version = "0.4.4" }
Expand Down
2 changes: 1 addition & 1 deletion lang/rust/avro/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -656,7 +656,7 @@ assert!(SchemaCompatibility::can_read(&writers_schema, &readers_schema).is_err()

## Minimal supported Rust version

1.65.0
1.70.0

## License
This project is licensed under [Apache License 2.0](https://github.com/apache/avro/blob/main/LICENSE.txt).
Expand Down
13 changes: 7 additions & 6 deletions lang/rust/avro/src/rabin.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,22 +20,23 @@ use digest::{
consts::U8, core_api::OutputSizeUser, generic_array::GenericArray, FixedOutput,
FixedOutputReset, HashMarker, Output, Reset, Update,
};
use lazy_static::lazy_static;
use std::sync::OnceLock;

const EMPTY: i64 = -4513414715797952619;

lazy_static! {
static ref FPTABLE: [i64; 256] = {
fn fp_table() -> &'static [i64; 256] {
static FPTABLE_ONCE: OnceLock<[i64; 256]> = OnceLock::new();
FPTABLE_ONCE.get_or_init(|| {
let mut fp_table: [i64; 256] = [0; 256];
for i in 0..256 {
let mut fp = i;
for _ in 0..8 {
fp = (fp as u64 >> 1) as i64 ^ (EMPTY & -(fp & 1));
}
fp_table[i as usize] = fp
fp_table[i as usize] = fp;
}
fp_table
};
})
}

/// Implementation of the Rabin fingerprint algorithm using the Digest trait as described in [schema_fingerprints](https://avro.apache.org/docs/current/spec.html#schema_fingerprints).
Expand Down Expand Up @@ -94,7 +95,7 @@ impl Update for Rabin {
fn update(&mut self, data: &[u8]) {
for b in data {
self.result = (self.result as u64 >> 8) as i64
^ FPTABLE[((self.result ^ *b as i64) & 0xff) as usize];
^ fp_table()[((self.result ^ *b as i64) & 0xff) as usize];
}
}
}
Expand Down
48 changes: 32 additions & 16 deletions lang/rust/avro/src/schema.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@
//! Logic for parsing and interacting with schemas in Avro format.
use crate::{error::Error, types, util::MapHelper, AvroResult};
use digest::Digest;
use lazy_static::lazy_static;
use regex_lite::Regex;
use serde::{
ser::{SerializeMap, SerializeSeq},
Expand All @@ -34,19 +33,36 @@ use std::{
hash::Hash,
io::Read,
str::FromStr,
sync::OnceLock,
};
use strum_macros::{EnumDiscriminants, EnumString};

lazy_static! {
static ref ENUM_SYMBOL_NAME_R: Regex = Regex::new(r"^[A-Za-z_][A-Za-z0-9_]*$").unwrap();
fn enum_symbol_name_r() -> &'static Regex {
static ENUM_SYMBOL_NAME_ONCE: OnceLock<Regex> = OnceLock::new();
ENUM_SYMBOL_NAME_ONCE.get_or_init(|| Regex::new(r"^[A-Za-z_][A-Za-z0-9_]*$").unwrap())
}

// An optional namespace (with optional dots) followed by a name without any dots in it.
static ref SCHEMA_NAME_R: Regex =
Regex::new(r"^((?P<namespace>([A-Za-z_][A-Za-z0-9_]*(\.[A-Za-z_][A-Za-z0-9_]*)*)?)\.)?(?P<name>[A-Za-z_][A-Za-z0-9_]*)$").unwrap();
// An optional namespace (with optional dots) followed by a name without any dots in it.
fn schema_name_r() -> &'static Regex {
static SCHEMA_NAME_ONCE: OnceLock<Regex> = OnceLock::new();
SCHEMA_NAME_ONCE.get_or_init(|| {
Regex::new(
r"^((?P<namespace>([A-Za-z_][A-Za-z0-9_]*(\.[A-Za-z_][A-Za-z0-9_]*)*)?)\.)?(?P<name>[A-Za-z_][A-Za-z0-9_]*)$",
)
.unwrap()
})
}

static ref FIELD_NAME_R: Regex = Regex::new(r"^[A-Za-z_][A-Za-z0-9_]*$").unwrap();
fn field_name_r() -> &'static Regex {
static FIELD_NAME_ONCE: OnceLock<Regex> = OnceLock::new();
FIELD_NAME_ONCE.get_or_init(|| Regex::new(r"^[A-Za-z_][A-Za-z0-9_]*$").unwrap())
}

static ref NAMESPACE_R: Regex = Regex::new(r"^([A-Za-z_][A-Za-z0-9_]*(\.[A-Za-z_][A-Za-z0-9_]*)*)?$").unwrap();
fn namespace_r() -> &'static Regex {
static NAMESPACE_ONCE: OnceLock<Regex> = OnceLock::new();
NAMESPACE_ONCE.get_or_init(|| {
Regex::new(r"^([A-Za-z_][A-Za-z0-9_]*(\.[A-Za-z_][A-Za-z0-9_]*)*)?$").unwrap()
})
}

/// Represents an Avro schema fingerprint
Expand Down Expand Up @@ -252,9 +268,9 @@ impl Name {
}

fn get_name_and_namespace(name: &str) -> AvroResult<(String, Namespace)> {
let caps = SCHEMA_NAME_R
let caps = schema_name_r()
.captures(name)
.ok_or_else(|| Error::InvalidSchemaName(name.to_string(), SCHEMA_NAME_R.as_str()))?;
.ok_or_else(|| Error::InvalidSchemaName(name.to_string(), schema_name_r().as_str()))?;
Ok((
caps["name"].to_string(),
caps.name("namespace").map(|s| s.as_str().to_string()),
Expand Down Expand Up @@ -285,10 +301,10 @@ impl Name {
.filter(|ns| !ns.is_empty());

if let Some(ref ns) = namespace {
if !NAMESPACE_R.is_match(ns) {
if !namespace_r().is_match(ns) {
return Err(Error::InvalidNamespace(
ns.to_string(),
NAMESPACE_R.as_str(),
namespace_r().as_str(),
));
}
}
Expand Down Expand Up @@ -657,7 +673,7 @@ impl RecordField {
) -> AvroResult<Self> {
let name = field.name().ok_or(Error::GetNameFieldFromRecord)?;

if !FIELD_NAME_R.is_match(&name) {
if !field_name_r().is_match(&name) {
return Err(Error::FieldName(name));
}

Expand Down Expand Up @@ -1617,7 +1633,7 @@ impl Parser {
let mut existing_symbols: HashSet<&String> = HashSet::with_capacity(symbols.len());
for symbol in symbols.iter() {
// Ensure enum symbol names match [A-Za-z_][A-Za-z0-9_]*
if !ENUM_SYMBOL_NAME_R.is_match(symbol) {
if !enum_symbol_name_r().is_match(symbol) {
return Err(Error::EnumSymbolName(symbol.to_string()));
}

Expand Down Expand Up @@ -6193,15 +6209,15 @@ mod tests {
let name = Name::new(full_name);
assert!(name.is_err());
let expected =
Error::InvalidSchemaName(full_name.to_string(), SCHEMA_NAME_R.as_str()).to_string();
Error::InvalidSchemaName(full_name.to_string(), schema_name_r().as_str()).to_string();
let err = name.map_err(|e| e.to_string()).err().unwrap();
assert_eq!(expected, err);

let full_name = "ns..record1";
let name = Name::new(full_name);
assert!(name.is_err());
let expected =
Error::InvalidSchemaName(full_name.to_string(), SCHEMA_NAME_R.as_str()).to_string();
Error::InvalidSchemaName(full_name.to_string(), schema_name_r().as_str()).to_string();
let err = name.map_err(|e| e.to_string()).err().unwrap();
assert_eq!(expected, err);
Ok(())
Expand Down
Loading

0 comments on commit 92512b2

Please sign in to comment.