Merge !1562: lib/rules: fix a bug in subnet computations

webfutureiorepo · Jul 22, 2024 · c7694f1 · c7694f1
2 parents 0432278 + 0bd1e71
commit c7694f1
Showing 1 changed file with 32 additions and 15 deletions.
diff --git a/lib/rules/api.c b/lib/rules/api.c
@@ -818,16 +818,28 @@ int kr_rule_local_subtree(const knot_dname_t *apex, enum kr_rule_sub_t type,
 }
 
 
-/** Encode a subnet into a (longer) string.
+/** Encode a subnet into a (longer) string.  The result is in `buf` with returned length.
  *
  * The point is to have different encodings for different subnets,
  * with using just byte-length strings (e.g. for ::/1 vs. ::/2).
- * And we need to preserve order: FIXME description
- *  - natural partial order on subnets, one included in another
- *  - partial order on strings, one being a prefix of another
- *  - implies lexicographical order on the encoded strings
+ * You might imagine this as the space of all nodes of a binary trie.
  *
- * Consequently, given a set of subnets, the t
+ * == Key properties ==
+ * We're utilizing the order on the encoded strings.  LMDB uses lexicographical order.
+ * Optimization: the properties should cut down LMDB operation count when searching
+ * for rule sets typical in practice.  Some properties:
+ *  - full address is just a subnet containing only that address (/128 and /32)
+ *  - order of full addresses is kept the same as before encoding
+ *  - ancestor first: if subnet B is included inside subnet A, we get A < B
+ *  - subnet mixing: if two subnets do not share any address, all addresses of one
+ *    of them are ordered before all addresses of the other one
+ *
+ * == The encoding ==
+ * The encoding replaces each address bit by a pair of bits:
+ *  - 00 -> beyond the subnet's prefix
+ *  - 10 -> zero bit within the subnet's prefix
+ *  - 11 ->  one bit within the subnet's prefix
+ *  - we cut the byte-length - no need for all-zero suffixes
  */
 static int subnet_encode(const struct sockaddr *addr, int sub_len, uint8_t buf[32])
 {
@@ -838,25 +850,30 @@ static int subnet_encode(const struct sockaddr *addr, int sub_len, uint8_t buf[3
 		return kr_error(EINVAL);
 	const uint8_t *a = (const uint8_t *)/*sign*/kr_inaddr(addr);
 
-	// Algo: interleave bits of the address.  Bit pairs:
-	//  - 00 -> beyond the subnet's prefix
-	//  - 10 -> zero bit within the subnet's prefix
-	//  - 11 ->  one bit within the subnet's prefix
-	// Multiplying one uint8_t by 01010101 (in binary) will do interleaving.
 	int i;
 	// Let's hope that compiler optimizes this into something reasonable.
 	for (i = 0; sub_len > 0; ++i, sub_len -= 8) {
-		uint16_t x = a[i] * 85; // interleave by zero bits
-		uint8_t sub_mask = 255 >> (8 - MIN(sub_len, 8));
-		uint16_t r = x | (sub_mask * 85 * 2);
+		// r = a[i] interleaved by 1 bits (with 1s on the higher-value positions)
+		// https://graphics.stanford.edu/~seander/bithacks.html#Interleave64bitOps
+		// but we modify it slightly: no need for the 0x5555 mask (==0b0101010101010101)
+		// or the y-part - we instead just set all odd bits to 1s.
+		uint16_t r = (
+			(a[i] * 0x0101010101010101ULL & 0x8040201008040201ULL)
+				* 0x0102040810204081ULL >> 49
+		        ) | 0xAAAAU/* = 0b1010'1010'1010'1010 */;
+		// now r might just need clipping
+		if (sub_len < 8) {
+			uint16_t mask = 0xFFFFffffU << (2 * (8 - sub_len));
+			r &= mask;
+		}
 		buf[(ssize_t)2*i] = r / 256;
 		buf[(ssize_t)2*i + 1] = r % 256;
 	}
 	return i * 2;
 }
 
 // Is `a` subnet-prefix of `b`?  (a byte format of subnet_encode())
-bool subnet_is_prefix(uint8_t a, uint8_t b)
+static bool subnet_is_prefix(uint8_t a, uint8_t b)
 {
 	while (true) {
 		if (a >> 6 == 0)