From 062eafbd4f02275e4191b71ee5dd748c37764ef2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Vladim=C3=ADr=20=C4=8Cun=C3=A1t?= <vladimir.cunat@nic.cz>
Date: Wed, 26 Jun 2024 15:05:54 +0200
Subject: [PATCH 1/3] lib/rules: fix a bug in subnet computations

The problem mainly affected subnets not aligned on whole bytes,
but maybe also others.  Reported:
https://lists.nic.cz/hyperkitty/list/knot-resolver-users@lists.nic.cz/message/6P2JPK72WMVLP45TDV42DTACEA2N5NW2/

I'm really sorry about this; no idea why I thought that the simple
multiplication would suffice.
---
 lib/rules/api.c | 17 +++++++++++++----
 1 file changed, 13 insertions(+), 4 deletions(-)

diff --git a/lib/rules/api.c b/lib/rules/api.c
index 9dc01a4fe..2725e9a48 100644
--- a/lib/rules/api.c
+++ b/lib/rules/api.c
@@ -842,13 +842,22 @@ static int subnet_encode(const struct sockaddr *addr, int sub_len, uint8_t buf[3
 	//  - 00 -> beyond the subnet's prefix
 	//  - 10 -> zero bit within the subnet's prefix
 	//  - 11 ->  one bit within the subnet's prefix
-	// Multiplying one uint8_t by 01010101 (in binary) will do interleaving.
 	int i;
 	// Let's hope that compiler optimizes this into something reasonable.
 	for (i = 0; sub_len > 0; ++i, sub_len -= 8) {
-		uint16_t x = a[i] * 85; // interleave by zero bits
-		uint8_t sub_mask = 255 >> (8 - MIN(sub_len, 8));
-		uint16_t r = x | (sub_mask * 85 * 2);
+		// r = a[i] interleaved by 1 bits (with 1s on the higher-value positions)
+		// https://graphics.stanford.edu/~seander/bithacks.html#Interleave64bitOps
+		// but we modify it slightly: no need for the 0x5555 mask (==0b0101010101010101)
+		// or the y-part - we instead just set all odd bits to 1s.
+		uint16_t r = (
+			(a[i] * 0x0101010101010101ULL & 0x8040201008040201ULL)
+				* 0x0102040810204081ULL >> 49
+		        ) | 0xAAAAU/* = 0b1010'1010'1010'1010 */;
+		// now r might just need clipping
+		if (sub_len < 8) {
+			uint16_t mask = 0xFFFFffffU << (2 * (8 - sub_len));
+			r &= mask;
+		}
 		buf[(ssize_t)2*i] = r / 256;
 		buf[(ssize_t)2*i + 1] = r % 256;
 	}

From ea262c7195f6ad0e9394cd8d9c6a45806f59dbbc Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Vladim=C3=ADr=20=C4=8Cun=C3=A1t?= <vladimir.cunat@nic.cz>
Date: Wed, 26 Jun 2024 16:07:13 +0200
Subject: [PATCH 2/3] lib/rules nit: missing `static` for a function

---
 lib/rules/api.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/rules/api.c b/lib/rules/api.c
index 2725e9a48..c4e254502 100644
--- a/lib/rules/api.c
+++ b/lib/rules/api.c
@@ -865,7 +865,7 @@ static int subnet_encode(const struct sockaddr *addr, int sub_len, uint8_t buf[3
 }
 
 // Is `a` subnet-prefix of `b`?  (a byte format of subnet_encode())
-bool subnet_is_prefix(uint8_t a, uint8_t b)
+static bool subnet_is_prefix(uint8_t a, uint8_t b)
 {
 	while (true) {
 		if (a >> 6 == 0)

From 0bd1e71b275ef77689440bbf3739da5c25b00f5e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Vladim=C3=ADr=20=C4=8Cun=C3=A1t?= <vladimir.cunat@nic.cz>
Date: Thu, 27 Jun 2024 09:53:11 +0200
Subject: [PATCH 3/3] lib/rules subnet_encode(): improve doc-comments

---
 lib/rules/api.c | 28 ++++++++++++++++++----------
 1 file changed, 18 insertions(+), 10 deletions(-)

diff --git a/lib/rules/api.c b/lib/rules/api.c
index c4e254502..5ecbe29eb 100644
--- a/lib/rules/api.c
+++ b/lib/rules/api.c
@@ -818,16 +818,28 @@ int kr_rule_local_subtree(const knot_dname_t *apex, enum kr_rule_sub_t type,
 }
 
 
-/** Encode a subnet into a (longer) string.
+/** Encode a subnet into a (longer) string.  The result is in `buf` with returned length.
  *
  * The point is to have different encodings for different subnets,
  * with using just byte-length strings (e.g. for ::/1 vs. ::/2).
- * And we need to preserve order: FIXME description
- *  - natural partial order on subnets, one included in another
- *  - partial order on strings, one being a prefix of another
- *  - implies lexicographical order on the encoded strings
+ * You might imagine this as the space of all nodes of a binary trie.
  *
- * Consequently, given a set of subnets, the t
+ * == Key properties ==
+ * We're utilizing the order on the encoded strings.  LMDB uses lexicographical order.
+ * Optimization: the properties should cut down LMDB operation count when searching
+ * for rule sets typical in practice.  Some properties:
+ *  - full address is just a subnet containing only that address (/128 and /32)
+ *  - order of full addresses is kept the same as before encoding
+ *  - ancestor first: if subnet B is included inside subnet A, we get A < B
+ *  - subnet mixing: if two subnets do not share any address, all addresses of one
+ *    of them are ordered before all addresses of the other one
+ *
+ * == The encoding ==
+ * The encoding replaces each address bit by a pair of bits:
+ *  - 00 -> beyond the subnet's prefix
+ *  - 10 -> zero bit within the subnet's prefix
+ *  - 11 ->  one bit within the subnet's prefix
+ *  - we cut the byte-length - no need for all-zero suffixes
  */
 static int subnet_encode(const struct sockaddr *addr, int sub_len, uint8_t buf[32])
 {
@@ -838,10 +850,6 @@ static int subnet_encode(const struct sockaddr *addr, int sub_len, uint8_t buf[3
 		return kr_error(EINVAL);
 	const uint8_t *a = (const uint8_t *)/*sign*/kr_inaddr(addr);
 
-	// Algo: interleave bits of the address.  Bit pairs:
-	//  - 00 -> beyond the subnet's prefix
-	//  - 10 -> zero bit within the subnet's prefix
-	//  - 11 ->  one bit within the subnet's prefix
 	int i;
 	// Let's hope that compiler optimizes this into something reasonable.
 	for (i = 0; sub_len > 0; ++i, sub_len -= 8) {