diff --git a/Cargo.lock b/Cargo.lock index 85ecfab60b9..0e79382b8eb 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5255,6 +5255,7 @@ dependencies = [ "tokio-timer", "toml", "txn_types", + "unsigned-varint", "url", "uuid", "walkdir", @@ -5735,6 +5736,12 @@ version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "826e7639553986605ec5979c7dd957c7895e93eabed50ab2ffa7f6128a75097c" +[[package]] +name = "unsigned-varint" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5f8d425fafb8cd76bc3f22aace4af471d3156301d7508f2107e98fbeae10bc7f" + [[package]] name = "untrusted" version = "0.7.1" diff --git a/Cargo.toml b/Cargo.toml index 0a2b8a5ec22..202e82325d4 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -225,6 +225,7 @@ toml = "0.5" txn_types = { path = "components/txn_types", default-features = false } url = "2" uuid = { version = "0.8.1", features = ["serde", "v4"] } +unsigned-varint = "0.7" walkdir = "2" yatp = { git = "https://github.com/tikv/yatp.git", branch = "master" } resource_metering = { path = "components/resource_metering" } diff --git a/components/keys/src/lib.rs b/components/keys/src/lib.rs index 6df0796198c..1ac05ccbdb3 100644 --- a/components/keys/src/lib.rs +++ b/components/keys/src/lib.rs @@ -29,9 +29,6 @@ pub const DATA_PREFIX: u8 = b'z'; pub const DATA_PREFIX_KEY: &[u8] = &[DATA_PREFIX]; pub const DATA_MIN_KEY: &[u8] = &[DATA_PREFIX]; pub const DATA_MAX_KEY: &[u8] = &[DATA_PREFIX + 1]; -pub const DATA_TIDB_RANGES: &[(&[u8], &[u8])] = &[(&[b'm'], &[b'm' + 1]), (&[b't'], &[b't' + 1])]; -pub const DATA_TIDB_RANGES_COMPLEMENT: &[(&[u8], &[u8])] = - &[(&[], &[b'm']), (&[b'm' + 1], &[b't']), (&[b't' + 1], &[])]; // Following keys are all local keys, so the first byte must be 0x01. pub const STORE_IDENT_KEY: &[u8] = &[LOCAL_PREFIX, 0x01]; diff --git a/src/storage/key_prefix.rs b/src/storage/key_prefix.rs new file mode 100644 index 00000000000..6c2b452bc73 --- /dev/null +++ b/src/storage/key_prefix.rs @@ -0,0 +1,99 @@ +// Copyright 2021 TiKV Project Authors. Licensed under Apache-2.0. + +//! Key prefix definistions and utils for API V2. + +pub const TIDB_RANGES: &[(&[u8], &[u8])] = &[(&[b'm'], &[b'm' + 1]), (&[b't'], &[b't' + 1])]; +pub const TIDB_RANGES_COMPLEMENT: &[(&[u8], &[u8])] = + &[(&[], &[b'm']), (&[b'm' + 1], &[b't']), (&[b't' + 1], &[])]; + +pub const RAW_KEY_PREFIX: u8 = b'r'; +pub const TXN_KEY_PREFIX: u8 = b'x'; + +/// Checks if the key is in TiDB encode. +/// +/// Returning true doesn't mean that the key is certainly written by +/// TiDB, but instead, it matches the definition of TiDB key in API V2, +/// therefore, the key is treated as TiDB data in order to fulfill the +/// compatibility. +pub fn is_tidb_key(key: &[u8]) -> bool { + matches!(KeyPrefix::parse(key).0, KeyPrefix::TiDB) +} + +/// Checks if the key is in RawKV encode. +pub fn is_raw_key(key: &[u8]) -> bool { + matches!(KeyPrefix::parse(key).0, KeyPrefix::Raw { .. }) +} + +/// Checks if the key is in TxnKV encode. +pub fn is_txn_key(key: &[u8]) -> bool { + matches!(KeyPrefix::parse(key).0, KeyPrefix::Txn { .. }) +} + +/// The key prefix in API V2. +#[derive(Debug, Clone, Copy, Eq, PartialEq)] +pub enum KeyPrefix { + /// Raw key prefix. + Raw { keyspace_id: usize }, + /// Transaction key prefix. + Txn { keyspace_id: usize }, + /// TiDB key prefix. + TiDB, + /// Unrecognised key prefix. + Unknown, +} + +impl KeyPrefix { + /// Parse the keys prefix according to the API V2 definition and return the user key. + pub fn parse(key: &[u8]) -> (KeyPrefix, &[u8]) { + if key.is_empty() { + return (KeyPrefix::Unknown, key); + } + + match key[0] { + RAW_KEY_PREFIX => unsigned_varint::decode::usize(&key[1..]) + .map(|(keyspace_id, rest)| (KeyPrefix::Raw { keyspace_id }, rest)) + .unwrap_or((KeyPrefix::Unknown, key)), + TXN_KEY_PREFIX => unsigned_varint::decode::usize(&key[1..]) + .map(|(keyspace_id, rest)| (KeyPrefix::Txn { keyspace_id }, rest)) + .unwrap_or((KeyPrefix::Unknown, key)), + b'm' | b't' => { + // TiDB prefix is also a part of the user key, so don't strip the prefix. + (KeyPrefix::TiDB, key) + } + _ => (KeyPrefix::Unknown, key), + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + const KEYSPACE_ID_500: &[u8] = &[244, 3]; + + #[test] + fn test_keyspace_id() { + let mut buf = [0; 10]; + let slice = unsigned_varint::encode::usize(500, &mut buf); + assert_eq!(slice, KEYSPACE_ID_500); + } + + #[test] + fn test_parse() { + assert_eq!( + KeyPrefix::parse(&[RAW_KEY_PREFIX, 244, 3, b'a', b'b']), + (KeyPrefix::Raw { keyspace_id: 500 }, &b"ab"[..]) + ); + assert_eq!( + KeyPrefix::parse(&[TXN_KEY_PREFIX, 244, 3]), + (KeyPrefix::Txn { keyspace_id: 500 }, &b""[..]) + ); + assert_eq!(KeyPrefix::parse(b"t_a"), (KeyPrefix::TiDB, &b"t_a"[..])); + assert_eq!(KeyPrefix::parse(b"m"), (KeyPrefix::TiDB, &b"m"[..])); + assert_eq!(KeyPrefix::parse(b"ot"), (KeyPrefix::Unknown, &b"ot"[..])); + assert_eq!( + KeyPrefix::parse(&[RAW_KEY_PREFIX, 244]), + (KeyPrefix::Unknown, &[RAW_KEY_PREFIX, 244][..]) + ); + } +} diff --git a/src/storage/mod.rs b/src/storage/mod.rs index c19f8d3a03d..e5321cd52a6 100644 --- a/src/storage/mod.rs +++ b/src/storage/mod.rs @@ -38,6 +38,7 @@ pub mod config; pub mod errors; +pub mod key_prefix; pub mod kv; pub mod lock_manager; pub(crate) mod metrics; @@ -63,6 +64,7 @@ pub use self::{ }; use crate::read_pool::{ReadPool, ReadPoolHandle}; +use crate::storage::key_prefix::TIDB_RANGES_COMPLEMENT; use crate::storage::metrics::CommandKind; use crate::storage::mvcc::MvccReader; use crate::storage::txn::commands::{RawAtomicStore, RawCompareAndSwap}; @@ -343,7 +345,7 @@ impl Storage { // Check if there are only TiDB data in the engine let snapshot = kv.snapshot(); for cf in DATA_CFS { - for (start, end) in keys::DATA_TIDB_RANGES_COMPLEMENT { + for (start, end) in TIDB_RANGES_COMPLEMENT { let mut unexpected_data_key = None; snapshot.scan_cf( cf,