diff --git a/CHANGELOG.md b/CHANGELOG.md index c4dd16d..d77abc6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,7 +3,12 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). --- ---- +### +# [0.8.19] - 2024-09-08 +- Add masquerade/reverse_masquerade map garbage collection to ```zfw.c -L -G, --list-gc-sessions``` which is now added to + /etc/cron.d/zfw_refresh as well so it will run once every 60 seconds unless modified. +- Fixed issue where icmp unreachable were not working for ipv4 masqueraded tcp/udp sessions that was introduced when dynamic PAT was added. + ### # [0.8.18] - 2024-09-07 - Add removal of udp state upon receipt of DNS reply from server for passthrough tracking / Masquerade diff --git a/README.md b/README.md index b23436b..6b1e92f 100644 --- a/README.md +++ b/README.md @@ -15,7 +15,10 @@ zfw can now provide native IPv4/IPv6 masquerade operation for outbound pass thro ```sudo zfw -k, --masquerade ``` This function requires that both ingress and egress TC filters are enabled on outbound interface. For IPv4 this is now using Dynamic PAT and IPv6 is using -static PAT. Note: When running on later kernels i.e. 6+ some older network hardware may not work with ebpf Dynamic PAT. +static PAT. Note: When running on later kernels i.e. 6+ some older network hardware may not work with ebpf Dynamic PAT. We have also seen some incompatibility with 2.5Gb interfaces on 5.x+ kernels. + +In release v0.8.19 masquerade session gc was added to /etc/cron.d/zfw_refresh via ```/opt/openziti/bin/zfw -L -G > /dev/null``` and runs once per minute. Stale udp sessions will be +removed if over 30s and stale tcp sessions will be removed if over 3600 seconds(1hr). ### Explicit Deny Rules This feature adds the ability to enter explicit deny rules by appending ```-d, --disable``` to the ```-I, --insert rule``` to either ingress or egress rules. Rule precedence is based on longest match prefix. If the prefix is the same then the precedence follows the order entry of the rules, which when listed will go from top to bottom for ports with in the same prefix e.g. diff --git a/files/scripts/zfw_refresh b/files/scripts/zfw_refresh index 97d50c1..69c872f 100644 --- a/files/scripts/zfw_refresh +++ b/files/scripts/zfw_refresh @@ -1,2 +1,3 @@ * * * * * root /opt/openziti/bin/zfw -L -E > /dev/null +* * * * * root /opt/openziti/bin/zfw -L -G > /dev/null diff --git a/src/zfw.c b/src/zfw.c index 19952b9..64465dc 100644 --- a/src/zfw.c +++ b/src/zfw.c @@ -102,6 +102,7 @@ bool ddos = false; bool add = false; bool delete = false; bool list = false; +bool list_gc = false; bool flush = false; bool lpt = false; bool hpt = false; @@ -246,7 +247,7 @@ char *direction_string; char *masq_interface; char check_alt[IF_NAMESIZE]; -const char *argp_program_version = "0.8.18"; +const char *argp_program_version = "0.8.19"; struct ring_buffer *ring_buffer; __u32 if_list[MAX_IF_LIST_ENTRIES]; @@ -258,6 +259,79 @@ struct interface uint32_t addresses[MAX_ADDRESSES]; }; +/*Key to masquerade_map*/ +struct masq_key { + uint32_t ifindex; + union { + __u32 ip; + __u32 ip6[4]; + }__in46_u_dest; + __u8 protocol; + __u16 sport; + __u16 dport; +}; + +/*value to masquerade_map and icmp_masquerade_map*/ +struct masq_value { + union { + __u32 ip; + __u32 ip6[4]; + }__in46_u_origin; + __u16 o_sport; +}; + +/*Key to masquerade_reverse_map*/ +struct masq_reverse_key { + uint32_t ifindex; + union { + __u32 ip; + __u32 ip6[4]; + }__in46_u_src; + union { + __u32 ip; + __u32 ip6[4]; + }__in46_u_dest; + __u8 protocol; + __u16 sport; + __u16 dport; +}; + +/*Key to tcp_map/udp_map*/ +struct tuple_key { + union { + __u32 ip; + __u32 ip6[4]; + }__in46_u_dst; + union { + __u32 ip; + __u32 ip6[4]; + }__in46_u_src; + __u16 sport; + __u16 dport; + __u32 ifindex; + __u8 type; +}; + +/*Value to tcp_map*/ +struct tcp_state { + unsigned long long tstamp; + __u32 sfseq; + __u32 cfseq; + __u8 syn; + __u8 sfin; + __u8 cfin; + __u8 sfack; + __u8 cfack; + __u8 ack; + __u8 rst; + __u8 est; +}; + +/*Value to udp_map*/ +struct udp_state { + unsigned long long tstamp; +}; + struct interface6 { uint32_t index; @@ -4568,6 +4642,304 @@ void map_delete() if_list_ext_delete_key(port_ext_key); } +struct masq_value get_reverse_masquerade(struct masq_reverse_key key){ + union bpf_attr map; + memset(&map, 0, sizeof(map)); + map.pathname = (uint64_t)masquerade_reverse_map_path; + map.bpf_fd = 0; + struct masq_value mstate = {0}; + int fd = syscall(__NR_bpf, BPF_OBJ_GET, &map, sizeof(map)); + if (fd == -1) + { + printf("BPF_OBJ_GET: %s\n", strerror(errno)); + return mstate; + } + map.map_fd = fd; + map.key = (uint64_t)&key; + map.value = (uint64_t)&mstate; + syscall(__NR_bpf, BPF_MAP_LOOKUP_ELEM, &map, sizeof(map)); + close(fd); + return mstate; +} + +struct masq_value get_masquerade(struct masq_key key){ + union bpf_attr map; + memset(&map, 0, sizeof(map)); + map.pathname = (uint64_t)masquerade_map_path; + map.bpf_fd = 0; + struct masq_value mstate = {0}; + int fd = syscall(__NR_bpf, BPF_OBJ_GET, &map, sizeof(map)); + if (fd == -1) + { + printf("BPF_OBJ_GET: %s\n", strerror(errno)); + return mstate; + } + map.map_fd = fd; + map.key = (uint64_t)&key; + map.value = (uint64_t)&mstate; + syscall(__NR_bpf, BPF_MAP_LOOKUP_ELEM, &map, sizeof(map)); + close(fd); + return mstate; +} + +void del_reverse_masq(struct masq_reverse_key key){ + union bpf_attr map; + memset(&map, 0, sizeof(map)); + map.pathname = (uint64_t)masquerade_reverse_map_path; + int fd = syscall(__NR_bpf, BPF_OBJ_GET, &map, sizeof(map)); + if (fd == -1) + { + printf("BPF_OBJ_GET: %s\n", strerror(errno)); + return; + } + // delete element with specified key + map.map_fd = fd; + map.key = (uint64_t)&key; + syscall(__NR_bpf, BPF_MAP_DELETE_ELEM, &map, sizeof(map)); + close(fd); +} + +void del_masq(struct masq_key key){ + union bpf_attr map; + memset(&map, 0, sizeof(map)); + map.pathname = (uint64_t)masquerade_map_path; + int fd = syscall(__NR_bpf, BPF_OBJ_GET, &map, sizeof(map)); + if (fd == -1) + { + printf("BPF_OBJ_GET: %s\n", strerror(errno)); + return; + } + // delete element with specified key + map.map_fd = fd; + map.key = (uint64_t)&key; + syscall(__NR_bpf, BPF_MAP_DELETE_ELEM, &map, sizeof(map)); + close(fd); +} + +void tcp_egress_map_delete_key(struct tuple_key *key) +{ + struct timespec ts; + clock_gettime(CLOCK_MONOTONIC, &ts); + union bpf_attr map; + memset(&map, 0, sizeof(map)); + map.pathname = (uint64_t)tcp_map_path; + map.bpf_fd = 0; + int fd = syscall(__NR_bpf, BPF_OBJ_GET, &map, sizeof(map)); + if (fd == -1) + { + printf("BPF_OBJ_GET: %s\n", strerror(errno)); + close_maps(1); + } + map.map_fd = fd; + map.key = (uint64_t)key; + struct tcp_state tstate = {0}; + map.value = (uint64_t)&tstate; + int lookup = syscall(__NR_bpf, BPF_MAP_LOOKUP_ELEM, &map, sizeof(map)); + if(!lookup){ + //delete masquerade state if tstamp is more than 3600 seconds old + struct masq_reverse_key rk = {0}; + rk.dport = key->dport; + rk.sport = key->sport; + rk.ifindex = key->ifindex; + rk.__in46_u_dest.ip = key->__in46_u_dst.ip; + rk.__in46_u_src.ip = key->__in46_u_src.ip; + rk.protocol = IPPROTO_TCP; + struct masq_value rv = get_reverse_masquerade(rk); + if(rv.o_sport){ + char *saddr = nitoa(ntohl(key->__in46_u_src.ip)); + char *daddr = nitoa(ntohl(key->__in46_u_dst.ip)); + if(saddr && daddr){ + printf("found tcp egress masquerade -> source: %s | dest: %s | sport: %d | dport: %d, ifindex: %u age (sec): %lld\n" + , saddr, daddr, ntohs(key->sport), ntohs(key->dport), key->ifindex, + ((long long)((ts.tv_sec * 1000000000) + ts.tv_nsec) - tstate.tstamp)/1000000000); + } + if(saddr){ + free(saddr); + } + if(daddr){ + free(daddr); + } + if((((ts.tv_sec * 1000000000) + ts.tv_nsec) - tstate.tstamp) > 3600000000000){ + struct masq_reverse_key rk = {0}; + rk.dport = key->dport; + rk.sport = key->sport; + rk.ifindex = key->ifindex; + rk.__in46_u_dest.ip = key->__in46_u_dst.ip; + rk.__in46_u_src.ip = key->__in46_u_src.ip; + rk.protocol = IPPROTO_TCP; + struct masq_value rv = get_reverse_masquerade(rk); + if(rv.o_sport){ + + struct masq_key mk = {0}; + mk.dport = key->dport; + mk.sport = rv.o_sport; + mk.__in46_u_dest.ip = key->__in46_u_dst.ip; + mk.ifindex = key->ifindex; + mk.protocol = IPPROTO_TCP; + del_masq(mk); + } + del_reverse_masq(rk); + } + } + } + close(fd); +} + +void tcp_ipv6_egress_map_delete_key(struct tuple_key *key) +{ + struct timespec ts; + clock_gettime(CLOCK_MONOTONIC, &ts); + union bpf_attr map; + memset(&map, 0, sizeof(map)); + map.pathname = (uint64_t)tcp_map_path; + map.bpf_fd = 0; + int fd = syscall(__NR_bpf, BPF_OBJ_GET, &map, sizeof(map)); + if (fd == -1) + { + printf("BPF_OBJ_GET: %s\n", strerror(errno)); + close_maps(1); + } + map.map_fd = fd; + map.key = (uint64_t)key; + struct tcp_state tstate = {0}; + map.value = (uint64_t)&tstate; + int lookup = syscall(__NR_bpf, BPF_MAP_LOOKUP_ELEM, &map, sizeof(map)); + if(!lookup){ + struct masq_key mk = {0}; + mk.dport = key->dport; + mk.sport = key->sport; + memcpy(mk.__in46_u_dest.ip6, key->__in46_u_dst.ip6, sizeof(key->__in46_u_dst.ip6)); + mk.ifindex = key->ifindex; + mk.protocol = IPPROTO_TCP; + struct masq_value mv = get_masquerade(mk); + //delete masquerade state if tstamp is more than 3600 seconds old + if(mv.__in46_u_origin.ip6[0]){ + char saddr6[INET6_ADDRSTRLEN]; + char daddr6[INET6_ADDRSTRLEN]; + struct in6_addr saddr_6 = {0}; + struct in6_addr daddr_6 = {0}; + memcpy(saddr_6.__in6_u.__u6_addr32, key->__in46_u_src.ip6, sizeof(key->__in46_u_src.ip6)); + memcpy(daddr_6.__in6_u.__u6_addr32, key->__in46_u_dst.ip6, sizeof(key->__in46_u_dst.ip6)); + inet_ntop(AF_INET6, &saddr_6, saddr6, INET6_ADDRSTRLEN); + inet_ntop(AF_INET6, &daddr_6, daddr6, INET6_ADDRSTRLEN); + printf("found ipv6 tcp egress masquerade -> source: %s | dest: %s | sport: %d | dport: %d, ifindex: %u age (sec): %lld\n" + , saddr6, daddr6, ntohs(key->sport), ntohs(key->dport), key->ifindex, + ((long long)((ts.tv_sec * 1000000000) + ts.tv_nsec) - tstate.tstamp)/1000000000); + if((((ts.tv_sec * 1000000000) + ts.tv_nsec) - tstate.tstamp) > 3600000000000){ + del_masq(mk); + } + } + } + close(fd); +} + +void udp_egress_map_delete_key(struct tuple_key *key) +{ + struct timespec ts; + clock_gettime(CLOCK_MONOTONIC, &ts); + union bpf_attr map; + memset(&map, 0, sizeof(map)); + map.pathname = (uint64_t)udp_map_path; + map.bpf_fd = 0; + int fd = syscall(__NR_bpf, BPF_OBJ_GET, &map, sizeof(map)); + if (fd == -1) + { + printf("BPF_OBJ_GET: %s\n", strerror(errno)); + close_maps(1); + } + map.map_fd = fd; + map.key = (uint64_t)key; + struct udp_state ustate = {0}; + map.value = (uint64_t)&ustate; + int lookup = syscall(__NR_bpf, BPF_MAP_LOOKUP_ELEM, &map, sizeof(map)); + if(!lookup){ + //delete masquerade state if tstamp is more than 30 seconds old + struct masq_reverse_key rk = {0}; + rk.dport = key->dport; + rk.sport = key->sport; + rk.ifindex = key->ifindex; + rk.__in46_u_dest.ip = key->__in46_u_dst.ip; + rk.__in46_u_src.ip = key->__in46_u_src.ip; + rk.protocol = IPPROTO_UDP; + struct masq_value rv = get_reverse_masquerade(rk); + if(rv.o_sport){ + char *saddr = nitoa(ntohl(key->__in46_u_src.ip)); + char *daddr = nitoa(ntohl(key->__in46_u_dst.ip)); + if(saddr && daddr){ + printf("found udp egress masquerade -> source: %s | dest: %s | sport: %d | dport: %d, ifindex: %u age (sec): %lld\n" + , saddr, daddr, ntohs(key->sport), ntohs(key->dport), key->ifindex, + ((long long)((ts.tv_sec * 1000000000) + ts.tv_nsec) - ustate.tstamp)/1000000000); + } + if(saddr){ + free(saddr); + } + if(daddr){ + free(daddr); + } + if(((((ts.tv_sec * 1000000000) + ts.tv_nsec) - ustate.tstamp) > 30000000000) && rv.o_sport) + { + struct masq_key mk = {0}; + mk.dport = key->dport; + mk.sport = rv.o_sport; + mk.__in46_u_dest.ip = key->__in46_u_dst.ip; + mk.ifindex = key->ifindex; + mk.protocol = IPPROTO_UDP; + del_masq(mk); + del_reverse_masq(rk); + } + } + } + close(fd); +} + +void udp_ipv6_egress_map_delete_key(struct tuple_key *key) +{ + struct timespec ts; + clock_gettime(CLOCK_MONOTONIC, &ts); + union bpf_attr map; + memset(&map, 0, sizeof(map)); + map.pathname = (uint64_t)udp_map_path; + map.bpf_fd = 0; + int fd = syscall(__NR_bpf, BPF_OBJ_GET, &map, sizeof(map)); + if (fd == -1) + { + printf("BPF_OBJ_GET: %s\n", strerror(errno)); + close_maps(1); + } + map.map_fd = fd; + map.key = (uint64_t)key; + struct udp_state ustate = {0}; + map.value = (uint64_t)&ustate; + int lookup = syscall(__NR_bpf, BPF_MAP_LOOKUP_ELEM, &map, sizeof(map)); + if(!lookup){ + struct masq_key mk = {0}; + mk.dport = key->dport; + mk.sport = key->sport; + memcpy(mk.__in46_u_dest.ip6, key->__in46_u_dst.ip6, sizeof(key->__in46_u_dst.ip6)); + mk.ifindex = key->ifindex; + mk.protocol = IPPROTO_UDP; + struct masq_value mv = get_masquerade(mk); + //delete masquerade state if tstamp is more than 30 seconds old + if(mv.__in46_u_origin.ip6[0]){ + char saddr6[INET6_ADDRSTRLEN]; + char daddr6[INET6_ADDRSTRLEN]; + struct in6_addr saddr_6 = {0}; + struct in6_addr daddr_6 = {0}; + memcpy(saddr_6.__in6_u.__u6_addr32, key->__in46_u_src.ip6, sizeof(key->__in46_u_src.ip6)); + memcpy(daddr_6.__in6_u.__u6_addr32, key->__in46_u_dst.ip6, sizeof(key->__in46_u_dst.ip6)); + inet_ntop(AF_INET6, &saddr_6, saddr6, INET6_ADDRSTRLEN); + inet_ntop(AF_INET6, &daddr_6, daddr6, INET6_ADDRSTRLEN); + printf("found ipv6 udp egress masquerade -> source: %s | dest: %s | sport: %d | dport: %d, ifindex: %u age (sec): %lld\n" + , saddr6, daddr6, ntohs(key->sport), ntohs(key->dport), key->ifindex, + ((long long)((ts.tv_sec * 1000000000) + ts.tv_nsec) - ustate.tstamp)/1000000000); + if((((ts.tv_sec * 1000000000) + ts.tv_nsec) - ustate.tstamp) > 30000000000){ + del_masq(mk); + } + } + } + close(fd); +} + void map_flush6() { union bpf_attr map; @@ -5115,6 +5487,96 @@ void map_flush() } } +int flush_udp_egress() +{ + union bpf_attr map; + struct tuple_key init_key = {0}; + struct tuple_key *key = &init_key; + struct tuple_key current_key = {0}; + struct udp_state ostate; + // Open BPF udp_map + memset(&map, 0, sizeof(map)); + map.pathname = (uint64_t)udp_map_path; + map.bpf_fd = 0; + map.file_flags = 0; + int fd = syscall(__NR_bpf, BPF_OBJ_GET, &map, sizeof(map)); + if (fd == -1) + { + printf("BPF_OBJ_GET: %s \n", strerror(errno)); + return 1; + } + map.map_fd = fd; + map.key = (uint64_t)key; + map.value = (uint64_t)&ostate; + int ret = 0; + while (true) + { + ret = syscall(__NR_bpf, BPF_MAP_GET_NEXT_KEY, &map, sizeof(map)); + if (ret == -1) + { + break; + } + map.key = map.next_key; + current_key = *(struct tuple_key *)map.key; + struct tuple_key *pass_key = malloc(sizeof(struct tuple_key)); + memcpy(pass_key,¤t_key, sizeof(struct tuple_key)); + if(current_key.type == 4){ + udp_egress_map_delete_key(pass_key); + } + else{ + udp_ipv6_egress_map_delete_key(pass_key); + } + free(pass_key); + } + close(fd); + return 0; +} + +int flush_tcp_egress() +{ + union bpf_attr map; + struct tuple_key init_key = {0}; + struct tuple_key *key = &init_key; + struct tuple_key current_key = {0}; + struct udp_state ostate; + // Open BPF tcp_map + memset(&map, 0, sizeof(map)); + map.pathname = (uint64_t)tcp_map_path; + map.bpf_fd = 0; + map.file_flags = 0; + int fd = syscall(__NR_bpf, BPF_OBJ_GET, &map, sizeof(map)); + if (fd == -1) + { + printf("BPF_OBJ_GET: %s \n", strerror(errno)); + return 1; + } + map.map_fd = fd; + map.key = (uint64_t)key; + map.value = (uint64_t)&ostate; + int ret = 0; + while (true) + { + ret = syscall(__NR_bpf, BPF_MAP_GET_NEXT_KEY, &map, sizeof(map)); + if (ret == -1) + { + break; + } + map.key = map.next_key; + current_key = *(struct tuple_key *)map.key; + struct tuple_key *pass_key = malloc(sizeof(struct tuple_key)); + memcpy(pass_key,¤t_key, sizeof(struct tuple_key)); + if(current_key.type == 4){ + tcp_egress_map_delete_key(pass_key); + } + else{ + tcp_ipv6_egress_map_delete_key(pass_key); + } + free(pass_key); + } + close(fd); + return 0; +} + void map_list() { union bpf_attr map; @@ -5578,6 +6040,7 @@ void map_list_all() static struct argp_option options[] = { {"delete", 'D', NULL, 0, "Delete map rule", 0}, {"list-diag", 'E', NULL, 0, "", 0}, + {"list-gc-sessions", 'G', NULL, 0, "", 0}, {"flush", 'F', NULL, 0, "Flush all map rules", 0}, {"insert", 'I', NULL, 0, "Insert map rule", 0}, {"list", 'L', NULL, 0, "List map rules", 0}, @@ -5634,6 +6097,9 @@ static error_t parse_opt(int key, char *arg, struct argp_state *state) case 'F': flush = true; break; + case 'G': + list_gc = true; + break; case 'I': add = true; break; @@ -6580,6 +7046,11 @@ int main(int argc, char **argv) usage("-E, --list-diag requires -L --list"); } + if (list_gc && !list) + { + usage("-G, --list-gc-sessions requires -L --list"); + } + if ((tun && (echo || ssh_disable || verbose || per_interface || add || delete || list || flush || tcfilter))) { usage("-T, --set-tun-mode cannot be set as a part of combination call to zfw"); @@ -6842,6 +7313,16 @@ int main(int argc, char **argv) interface_diag(); close_maps(0); } + if (list_gc) + { + if (cd || dl || cs || sl || prot || ddos_saddr_list || list_diag) + { + usage("-G, --list-gc-sessions cannot be combined with other list arguments -E,-c,-o, -m, -n, -p, -Y"); + } + flush_udp_egress(); + flush_tcp_egress(); + close_maps(0); + } if (ddos_saddr_list) { if (cd || dl || cs || sl || prot || ddos_dport_list) diff --git a/src/zfw_monitor.c b/src/zfw_monitor.c index d7eb3e3..40cbe07 100644 --- a/src/zfw_monitor.c +++ b/src/zfw_monitor.c @@ -85,7 +85,7 @@ char check_alt[IF_NAMESIZE]; char doc[] = "zfw_monitor -- ebpf firewall monitor tool"; const char *rb_map_path = "/sys/fs/bpf/tc/globals/rb_map"; const char *tproxy_map_path = "/sys/fs/bpf/tc/globals/zt_tproxy_map"; -const char *argp_program_version = "0.8.18"; +const char *argp_program_version = "0.8.19"; union bpf_attr rb_map; int rb_fd = -1; diff --git a/src/zfw_tc_ingress.c b/src/zfw_tc_ingress.c index 8d18557..8571243 100644 --- a/src/zfw_tc_ingress.c +++ b/src/zfw_tc_ingress.c @@ -185,6 +185,8 @@ struct tuple_key { }__in46_u_src; __u16 sport; __u16 dport; + __u32 ifindex; + __u8 type; }; /*Key to icmp_echo_map*/ @@ -622,7 +624,7 @@ struct { /*tracks udp and tcp masquerade*/ struct { - __uint(type, BPF_MAP_TYPE_LRU_HASH); + __uint(type, BPF_MAP_TYPE_HASH); __uint(key_size, sizeof(struct masq_key)); __uint(value_size,sizeof(struct masq_value)); __uint(max_entries, BPF_MAX_SESSIONS * 2); @@ -631,7 +633,7 @@ struct { /*stores reverse lookup table udp and tcp masquerade*/ struct { - __uint(type, BPF_MAP_TYPE_LRU_HASH); + __uint(type, BPF_MAP_TYPE_HASH); __uint(key_size, sizeof(struct masq_reverse_key)); __uint(value_size,sizeof(struct masq_value)); __uint(max_entries, BPF_MAX_SESSIONS * 2); @@ -1418,8 +1420,20 @@ int bpf_sk_splice(struct __sk_buff *skb){ } __u16 otcpcheck = itcph->check; int flags = BPF_F_MARK_MANGLED_0 | BPF_F_MARK_ENFORCE | BPF_F_PSEUDO_HDR; + struct l4_change_fields{ + __u32 saddr; + __u16 sport; + }; + struct l4_change_fields old_fields = {0}; + struct l4_change_fields new_fields = {0}; + old_fields.saddr = local_ip4->ipaddr[0]; + old_fields.sport = mk.sport; + new_fields.saddr = mv->__in46_u_origin.ip; + new_fields.sport = mv->o_sport; + __u32 l4_sum_tcp = bpf_csum_diff((__u32 *)&old_fields, sizeof(old_fields), (__u32 *)&new_fields, sizeof(new_fields), 0); + itcph->source = mv->o_sport; bpf_l4_csum_replace(skb, sizeof(struct ethhdr) + sizeof(struct iphdr) + sizeof(struct icmphdr) + inner_iph->ihl *4 + - offsetof(struct tcphdr, check), 0, l3_sum, flags | 0); + offsetof(struct tcphdr, check), 0, l4_sum_tcp, flags); iph = (struct iphdr *)(skb->data + sizeof(*eth)); if ((unsigned long)(iph + 1) > (unsigned long)skb->data_end){ return TC_ACT_SHOT; @@ -1442,8 +1456,18 @@ int bpf_sk_splice(struct __sk_buff *skb){ if ((unsigned long)(itcph + 1) > (unsigned long)skb->data_end){ return TC_ACT_SHOT; } - __u32 l4_sum = bpf_csum_diff((__u32 *)&otcpcheck, sizeof(__u32),(__u32 *)&itcph->check, sizeof(__u32), 0); - bpf_l4_csum_replace(skb, sizeof(struct ethhdr) + sizeof(struct iphdr) + offsetof(struct icmphdr, checksum), 0, l4_sum, flags | 0); + struct icmp_l4_change_fields{ + __u16 sport; + __u16 check; + }; + struct icmp_l4_change_fields old_icmp_fields = {0}; + struct icmp_l4_change_fields new_icmp_fields = {0}; + old_icmp_fields.sport = mk.sport; + old_icmp_fields.check = otcpcheck; + new_icmp_fields.sport = mv->o_sport; + new_icmp_fields.check = itcph->check; + __u32 l4_sum_icmp = bpf_csum_diff((__u32 *)&old_icmp_fields, sizeof(old_icmp_fields),(__u32 *)&new_icmp_fields, sizeof(new_icmp_fields), 0); + bpf_l4_csum_replace(skb, sizeof(struct ethhdr) + sizeof(struct iphdr) + offsetof(struct icmphdr, checksum), 0, l4_sum_icmp, flags | 0); iph = (struct iphdr *)(skb->data + sizeof(*eth)); if ((unsigned long)(iph + 1) > (unsigned long)skb->data_end){ return TC_ACT_SHOT; @@ -1475,6 +1499,8 @@ int bpf_sk_splice(struct __sk_buff *skb){ tk.__in46_u_src.ip = iph->daddr; tk.dport = o_session->ipv4.dport; tk.sport = o_session->ipv4.sport; + tk.ifindex = event.ifindex; + tk.type = 4; struct tcp_state *ts = get_tcp(tk); if(ts){ return TC_ACT_OK; @@ -1550,8 +1576,7 @@ int bpf_sk_splice(struct __sk_buff *skb){ return TC_ACT_SHOT; } l3_sum = bpf_csum_diff((__u32 *)&inner_iph->saddr, sizeof(__u32),(__u32 *)&mv->__in46_u_origin.ip, sizeof(__u32), 0); - inner_iph->saddr = mv->__in46_u_origin.ip; - + inner_iph->saddr = mv->__in46_u_origin.ip; bpf_l3_csum_replace(skb, sizeof(struct ethhdr) + sizeof(struct iphdr) + sizeof(struct icmphdr) + offsetof(struct iphdr, check), 0, l3_sum, 0); iph = (struct iphdr *)(skb->data + sizeof(*eth)); if ((unsigned long)(iph + 1) > (unsigned long)skb->data_end){ @@ -1571,6 +1596,94 @@ int bpf_sk_splice(struct __sk_buff *skb){ } return TC_ACT_SHOT; } + struct udphdr *iudph = (struct udphdr *)((unsigned long)inner_iph + sizeof(*inner_iph)); + if ((unsigned long)(iudph + 1) > (unsigned long)skb->data_end){ + return TC_ACT_SHOT; + } + u_session = (struct udp_v4_tuple *)(void*)(long)&inner_iph->saddr; + if ((unsigned long)(u_session + 1) > (unsigned long)skb->data_end){ + event.error_code = IP_TUPLE_TOO_BIG; + send_event(&event); + return TC_ACT_SHOT; + } + __u16 oudpcheck = iudph->check; + int flags = BPF_F_MARK_MANGLED_0 | BPF_F_MARK_ENFORCE | BPF_F_PSEUDO_HDR; + struct l4_change_fields{ + __u32 saddr; + __u16 sport; + }; + struct l4_change_fields old_fields = {0}; + struct l4_change_fields new_fields = {0}; + old_fields.saddr = local_ip4->ipaddr[0]; + old_fields.sport = mk.sport; + new_fields.saddr = mv->__in46_u_origin.ip; + new_fields.sport = mv->o_sport; + __u32 l4_sum_udp = bpf_csum_diff((__u32 *)&old_fields, sizeof(old_fields), (__u32 *)&new_fields, sizeof(new_fields), 0); + iudph->source = mv->o_sport; + bpf_l4_csum_replace(skb, sizeof(struct ethhdr) + sizeof(struct iphdr) + sizeof(struct icmphdr) + inner_iph->ihl *4 + + offsetof(struct udphdr, check), 0, l4_sum_udp, flags); + iph = (struct iphdr *)(skb->data + sizeof(*eth)); + if ((unsigned long)(iph + 1) > (unsigned long)skb->data_end){ + return TC_ACT_SHOT; + } + icmph = (struct icmphdr *)((unsigned long)iph + sizeof(*iph)); + if ((unsigned long)(icmph + 1) > (unsigned long)skb->data_end){ + event.error_code = ICMP_HEADER_TOO_BIG; + send_event(&event); + return TC_ACT_SHOT; + } + inner_iph = (struct iphdr *)((unsigned long)icmph + sizeof(*icmph)); + if ((unsigned long)(inner_iph + 1) > (unsigned long)skb->data_end){ + if(local_diag->verbose){ + event.error_code = ICMP_INNER_IP_HEADER_TOO_BIG; + send_event(&event); + } + return TC_ACT_SHOT; + } + iudph = (struct udphdr *)((unsigned long)inner_iph + sizeof(*inner_iph)); + if ((unsigned long)(iudph + 1) > (unsigned long)skb->data_end){ + return TC_ACT_SHOT; + } + u_session = (struct udp_v4_tuple *)(void*)(long)&inner_iph->saddr; + if ((unsigned long)(u_session + 1) > (unsigned long)skb->data_end){ + event.error_code = IP_TUPLE_TOO_BIG; + send_event(&event); + return TC_ACT_SHOT; + } + struct icmp_l4_change_fields{ + __u16 sport; + __u16 check; + }; + struct icmp_l4_change_fields old_icmp_fields = {0}; + struct icmp_l4_change_fields new_icmp_fields = {0}; + old_icmp_fields.sport = mk.sport; + old_icmp_fields.check = oudpcheck; + new_icmp_fields.sport = mv->o_sport; + new_icmp_fields.check = iudph->check; + __u32 l4_sum_icmp = bpf_csum_diff((__u32 *)&old_icmp_fields, sizeof(old_icmp_fields),(__u32 *)&new_icmp_fields, sizeof(new_icmp_fields), 0); + bpf_l4_csum_replace(skb, sizeof(struct ethhdr) + sizeof(struct iphdr) + offsetof(struct icmphdr, checksum), 0, l4_sum_icmp, flags | 0); + iph = (struct iphdr *)(skb->data + sizeof(*eth)); + if ((unsigned long)(iph + 1) > (unsigned long)skb->data_end){ + return TC_ACT_SHOT; + } + icmph = (struct icmphdr *)((unsigned long)iph + sizeof(*iph)); + if ((unsigned long)(icmph + 1) > (unsigned long)skb->data_end){ + event.error_code = ICMP_HEADER_TOO_BIG; + send_event(&event); + return TC_ACT_SHOT; + } + inner_iph = (struct iphdr *)((unsigned long)icmph + sizeof(*icmph)); + if ((unsigned long)(inner_iph + 1) > (unsigned long)skb->data_end){ + if(local_diag->verbose){ + event.error_code = ICMP_INNER_IP_HEADER_TOO_BIG; + send_event(&event); + } + return TC_ACT_SHOT; + } + iudph = (struct udphdr *)((unsigned long)inner_iph + sizeof(*inner_iph)); + if ((unsigned long)(iudph + 1) > (unsigned long)skb->data_end){ + return TC_ACT_SHOT; + } u_session = (struct udp_v4_tuple *)(void*)(long)&inner_iph->saddr; if ((unsigned long)(u_session + 1) > (unsigned long)skb->data_end){ event.error_code = IP_TUPLE_TOO_BIG; @@ -1607,6 +1720,8 @@ int bpf_sk_splice(struct __sk_buff *skb){ uk.__in46_u_src.ip = iph->daddr; uk.dport = u_session->dport; uk.sport = u_session->sport; + uk.ifindex = event.ifindex; + uk.type = 4; struct udp_state *us = get_udp(uk); if(us){ return TC_ACT_OK; @@ -1932,6 +2047,8 @@ int bpf_sk_splice(struct __sk_buff *skb){ tcp_state_key.__in46_u_src.ip = tuple->ipv4.daddr; tcp_state_key.sport = tuple->ipv4.dport; tcp_state_key.dport = tuple->ipv4.sport; + tcp_state_key.ifindex = event.ifindex; + tcp_state_key.type = 4; unsigned long long tstamp = bpf_ktime_get_ns(); struct tcp_state *tstate = get_tcp(tcp_state_key); /*check tcp state and timeout if greater than 60 minutes without traffic*/ @@ -2153,6 +2270,8 @@ int bpf_sk_splice(struct __sk_buff *skb){ udp_state_key.__in46_u_src.ip = tuple->ipv4.daddr; udp_state_key.sport = tuple->ipv4.dport; udp_state_key.dport = tuple->ipv4.sport; + udp_state_key.ifindex = event.ifindex; + udp_state_key.type = 4; unsigned long long tstamp = bpf_ktime_get_ns(); struct udp_state *ustate = get_udp(udp_state_key); if(ustate){ @@ -2209,8 +2328,8 @@ int bpf_sk_splice(struct __sk_buff *skb){ event.tracking_code = UDP_MATCHED_ACTIVE_STATE; send_event(&event); } - /*DNS state over after response so clear the state tables upon reply from server*/ - if(bpf_ntohs(udp_state_key.dport) == 53){ + /*DNS || NTP state over after response so clear the state tables upon reply from server*/ + if(bpf_ntohs(udp_state_key.dport) == 53 || bpf_ntohs(udp_state_key.dport) == 123){ if(local_diag->masquerade){ struct iphdr *iph = (struct iphdr *)(skb->data + sizeof(*eth)); if ((unsigned long)(iph + 1) > (unsigned long)skb->data_end){ @@ -2371,6 +2490,8 @@ int bpf_sk_splice(struct __sk_buff *skb){ memcpy(tcp_state_key.__in46_u_src.ip6,tuple->ipv6.daddr, sizeof(tuple->ipv6.daddr)); tcp_state_key.sport = tuple->ipv6.dport; tcp_state_key.dport = tuple->ipv6.sport; + tcp_state_key.ifindex =event.ifindex; + tcp_state_key.type = 6; unsigned long long tstamp = bpf_ktime_get_ns(); struct tcp_state *tstate = get_tcp(tcp_state_key); /*check tcp state and timeout if greater than 60 minutes without traffic*/ @@ -2496,6 +2617,8 @@ int bpf_sk_splice(struct __sk_buff *skb){ memcpy(udp_state_key.__in46_u_src.ip6,tuple->ipv6.daddr, sizeof(tuple->ipv6.daddr)); udp_state_key.sport = tuple->ipv6.dport; udp_state_key.dport = tuple->ipv6.sport; + udp_state_key.ifindex = event.ifindex; + udp_state_key.type = 6; unsigned long long tstamp = bpf_ktime_get_ns(); struct udp_state *ustate = get_udp(udp_state_key); if(ustate){ @@ -3501,6 +3624,8 @@ int bpf_sk_splice6(struct __sk_buff *skb){ udp_state_key.__in46_u_dst.ip = tuple->ipv4.daddr; udp_state_key.sport = tuple->ipv4.sport; udp_state_key.dport = tuple->ipv4.dport; + udp_state_key.ifindex = event.ifindex; + udp_state_key.type = 4; struct udp_state *ustate = get_udp_ingress(udp_state_key); if((!ustate) || (ustate->tstamp > (tstamp + 30000000000))){ struct udp_state us = { @@ -3527,6 +3652,8 @@ int bpf_sk_splice6(struct __sk_buff *skb){ tcp_state_key.__in46_u_dst.ip = tuple->ipv4.daddr; tcp_state_key.sport = tuple->ipv4.sport; tcp_state_key.dport = tuple->ipv4.dport; + tcp_state_key.ifindex = event.ifindex; + tcp_state_key.type = 4; unsigned long long tstamp = bpf_ktime_get_ns(); struct tcp_state *tstate; if(tcph->syn && !tcph->ack){ @@ -3639,6 +3766,8 @@ int bpf_sk_splice6(struct __sk_buff *skb){ memcpy(udp_state_key.__in46_u_dst.ip6,tuple->ipv6.daddr, sizeof(tuple->ipv6.daddr)); udp_state_key.sport = tuple->ipv6.sport; udp_state_key.dport = tuple->ipv6.dport; + udp_state_key.ifindex = event.ifindex; + udp_state_key.type = 6; struct udp_state *ustate = get_udp_ingress(udp_state_key); if((!ustate) || (ustate->tstamp > (tstamp + 30000000000))){ struct udp_state us = { @@ -3664,6 +3793,8 @@ int bpf_sk_splice6(struct __sk_buff *skb){ memcpy(tcp_state_key.__in46_u_dst.ip6,tuple->ipv6.daddr, sizeof(tuple->ipv6.daddr)); tcp_state_key.sport = tuple->ipv6.sport; tcp_state_key.dport = tuple->ipv6.dport; + tcp_state_key.ifindex = event.ifindex; + tcp_state_key.type = 6; unsigned long long tstamp = bpf_ktime_get_ns(); struct tcp_state *tstate; if(tcph->syn && !tcph->ack){ diff --git a/src/zfw_tc_outbound_track.c b/src/zfw_tc_outbound_track.c index 0fc48c3..78a3456 100644 --- a/src/zfw_tc_outbound_track.c +++ b/src/zfw_tc_outbound_track.c @@ -87,7 +87,7 @@ struct bpf_event{ unsigned char dest[6]; }; -/*Key to tcp_map and udp_map*/ +/*Key to tcp_map/udp_map*/ struct tuple_key { union { __u32 ip; @@ -99,6 +99,8 @@ struct tuple_key { }__in46_u_src; __u16 sport; __u16 dport; + __u32 ifindex; + __u8 type; }; /*Key to icmp_echo_map*/ @@ -468,7 +470,7 @@ struct { } icmp_echo_map SEC(".maps"); struct { - __uint(type, BPF_MAP_TYPE_LRU_HASH); + __uint(type, BPF_MAP_TYPE_HASH); __uint(key_size, sizeof(struct masq_key)); __uint(value_size,sizeof(struct masq_value)); __uint(max_entries, BPF_MAX_SESSIONS * 2); @@ -477,7 +479,7 @@ struct { /*stores reverse lookup table udp and tcp masquerade*/ struct { - __uint(type, BPF_MAP_TYPE_LRU_HASH); + __uint(type, BPF_MAP_TYPE_HASH); __uint(key_size, sizeof(struct masq_reverse_key)); __uint(value_size,sizeof(struct masq_value)); __uint(max_entries, BPF_MAX_SESSIONS * 2); @@ -1192,6 +1194,8 @@ int bpf_sk_splice(struct __sk_buff *skb){ tcp_state_key.__in46_u_src.ip = tuple->ipv4.daddr; tcp_state_key.sport = tuple->ipv4.dport; tcp_state_key.dport = tuple->ipv4.sport; + tcp_state_key.ifindex = event.ifindex; + tcp_state_key.type = 4; unsigned long long tstamp = bpf_ktime_get_ns(); struct tcp_state *tstate = get_ingress_tcp(tcp_state_key); /*check tcp state and timeout if greater than 60 minutes without traffic*/ @@ -1264,6 +1268,8 @@ int bpf_sk_splice(struct __sk_buff *skb){ udp_state_key.__in46_u_src.ip = tuple->ipv4.daddr; udp_state_key.sport = tuple->ipv4.dport; udp_state_key.dport = tuple->ipv4.sport; + udp_state_key.ifindex = event.ifindex; + udp_state_key.type = 4; unsigned long long tstamp = bpf_ktime_get_ns(); struct udp_state *ustate = get_udp_ingress(udp_state_key); if(ustate) @@ -1378,6 +1384,8 @@ int bpf_sk_splice(struct __sk_buff *skb){ memcpy(tcp_state_key.__in46_u_src.ip6,tuple->ipv6.daddr, sizeof(tuple->ipv6.daddr)); tcp_state_key.sport = tuple->ipv6.dport; tcp_state_key.dport = tuple->ipv6.sport; + tcp_state_key.ifindex = event.ifindex; + tcp_state_key.type = 6; unsigned long long tstamp = bpf_ktime_get_ns(); struct tcp_state *tstate = get_ingress_tcp(tcp_state_key); /*check tcp state and timeout if greater than 60 minutes without traffic*/ @@ -1450,6 +1458,8 @@ int bpf_sk_splice(struct __sk_buff *skb){ memcpy(udp_state_key.__in46_u_src.ip6,tuple->ipv6.daddr, sizeof(tuple->ipv6.daddr)); udp_state_key.sport = tuple->ipv6.dport; udp_state_key.dport = tuple->ipv6.sport; + udp_state_key.ifindex = event.ifindex; + udp_state_key.type = 6; unsigned long long tstamp = bpf_ktime_get_ns(); struct udp_state *ustate = get_udp_ingress(udp_state_key); if(ustate){ @@ -2309,6 +2319,8 @@ int bpf_sk_splice6(struct __sk_buff *skb){ tcp_state_key.__in46_u_dst.ip = tuple->ipv4.daddr; tcp_state_key.sport = tuple->ipv4.sport; tcp_state_key.dport = tuple->ipv4.dport; + tcp_state_key.ifindex = event.ifindex; + tcp_state_key.type = 4; if(local_diag->masquerade && local_ip4 && local_ip4->count){ struct masq_reverse_key revk = {0}; revk.__in46_u_src.ip = tuple->ipv4.saddr; @@ -2558,6 +2570,8 @@ int bpf_sk_splice6(struct __sk_buff *skb){ udp_state_key.__in46_u_dst.ip = tuple->ipv4.daddr; udp_state_key.sport = tuple->ipv4.sport; udp_state_key.dport = tuple->ipv4.dport; + udp_state_key.ifindex = event.ifindex; + udp_state_key.type = 4; if(local_diag->masquerade && local_ip4 && local_ip4->count){ struct masq_reverse_key revk = {0}; revk.__in46_u_src.ip = tuple->ipv4.saddr; @@ -2726,6 +2740,8 @@ int bpf_sk_splice6(struct __sk_buff *skb){ memcpy(tcp_state_key.__in46_u_dst.ip6,tuple->ipv6.daddr, sizeof(tuple->ipv6.daddr)); tcp_state_key.sport = tuple->ipv6.sport; tcp_state_key.dport = tuple->ipv6.dport; + tcp_state_key.ifindex = event.ifindex; + tcp_state_key.type = 6; if(local_diag->masquerade && local_ip6 && local_ip6->count){ struct masq_value mv = {0}; memcpy(mv.__in46_u_origin.ip6, tuple->ipv6.saddr, sizeof(mv.__in46_u_origin.ip6)); @@ -2859,6 +2875,8 @@ int bpf_sk_splice6(struct __sk_buff *skb){ memcpy(udp_state_key.__in46_u_dst.ip6,tuple->ipv6.daddr, sizeof(tuple->ipv6.daddr)); udp_state_key.sport = tuple->ipv6.sport; udp_state_key.dport = tuple->ipv6.dport; + udp_state_key.ifindex = event.ifindex; + udp_state_key.type = 6; if(local_diag->masquerade && local_ip6 && local_ip6->count){ struct masq_value mv = {0}; memcpy(mv.__in46_u_origin.ip6, tuple->ipv6.saddr, sizeof(mv.__in46_u_origin.ip6));