1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Handle firewalling 4 * Linux ethernet bridge 5 * 6 * Authors: 7 * Lennert Buytenhek <buytenh@gnu.org> 8 * Bart De Schuymer <bdschuym@pandora.be> 9 * 10 * Lennert dedicates this file to Kerstin Wurdinger. 11 */ 12 13 #include <linux/module.h> 14 #include <linux/kernel.h> 15 #include <linux/slab.h> 16 #include <linux/ip.h> 17 #include <linux/netdevice.h> 18 #include <linux/skbuff.h> 19 #include <linux/if_arp.h> 20 #include <linux/if_ether.h> 21 #include <linux/if_vlan.h> 22 #include <linux/if_pppox.h> 23 #include <linux/ppp_defs.h> 24 #include <linux/netfilter_bridge.h> 25 #include <uapi/linux/netfilter_bridge.h> 26 #include <linux/netfilter_ipv4.h> 27 #include <linux/netfilter_ipv6.h> 28 #include <linux/netfilter_arp.h> 29 #include <linux/in_route.h> 30 #include <linux/rculist.h> 31 #include <linux/inetdevice.h> 32 33 #include <net/ip.h> 34 #include <net/ipv6.h> 35 #include <net/addrconf.h> 36 #include <net/route.h> 37 #include <net/netfilter/br_netfilter.h> 38 #include <net/netns/generic.h> 39 40 #include <linux/uaccess.h> 41 #include "br_private.h" 42 #ifdef CONFIG_SYSCTL 43 #include <linux/sysctl.h> 44 #endif 45 46 static unsigned int brnf_net_id __read_mostly; 47 48 struct brnf_net { 49 bool enabled; 50 51 #ifdef CONFIG_SYSCTL 52 struct ctl_table_header *ctl_hdr; 53 #endif 54 55 /* default value is 1 */ 56 int call_iptables; 57 int call_ip6tables; 58 int call_arptables; 59 60 /* default value is 0 */ 61 int filter_vlan_tagged; 62 int filter_pppoe_tagged; 63 int pass_vlan_indev; 64 }; 65 66 #define IS_IP(skb) \ 67 (!skb_vlan_tag_present(skb) && skb->protocol == htons(ETH_P_IP)) 68 69 #define IS_IPV6(skb) \ 70 (!skb_vlan_tag_present(skb) && skb->protocol == htons(ETH_P_IPV6)) 71 72 #define IS_ARP(skb) \ 73 (!skb_vlan_tag_present(skb) && skb->protocol == htons(ETH_P_ARP)) 74 75 static inline __be16 vlan_proto(const struct sk_buff *skb) 76 { 77 if (skb_vlan_tag_present(skb)) 78 return skb->protocol; 79 else if (skb->protocol == htons(ETH_P_8021Q)) 80 return vlan_eth_hdr(skb)->h_vlan_encapsulated_proto; 81 else 82 return 0; 83 } 84 85 static inline bool is_vlan_ip(const struct sk_buff *skb, const struct net *net) 86 { 87 struct brnf_net *brnet = net_generic(net, brnf_net_id); 88 89 return vlan_proto(skb) == htons(ETH_P_IP) && brnet->filter_vlan_tagged; 90 } 91 92 static inline bool is_vlan_ipv6(const struct sk_buff *skb, 93 const struct net *net) 94 { 95 struct brnf_net *brnet = net_generic(net, brnf_net_id); 96 97 return vlan_proto(skb) == htons(ETH_P_IPV6) && 98 brnet->filter_vlan_tagged; 99 } 100 101 static inline bool is_vlan_arp(const struct sk_buff *skb, const struct net *net) 102 { 103 struct brnf_net *brnet = net_generic(net, brnf_net_id); 104 105 return vlan_proto(skb) == htons(ETH_P_ARP) && brnet->filter_vlan_tagged; 106 } 107 108 static inline __be16 pppoe_proto(const struct sk_buff *skb) 109 { 110 return *((__be16 *)(skb_mac_header(skb) + ETH_HLEN + 111 sizeof(struct pppoe_hdr))); 112 } 113 114 static inline bool is_pppoe_ip(const struct sk_buff *skb, const struct net *net) 115 { 116 struct brnf_net *brnet = net_generic(net, brnf_net_id); 117 118 return skb->protocol == htons(ETH_P_PPP_SES) && 119 pppoe_proto(skb) == htons(PPP_IP) && brnet->filter_pppoe_tagged; 120 } 121 122 static inline bool is_pppoe_ipv6(const struct sk_buff *skb, 123 const struct net *net) 124 { 125 struct brnf_net *brnet = net_generic(net, brnf_net_id); 126 127 return skb->protocol == htons(ETH_P_PPP_SES) && 128 pppoe_proto(skb) == htons(PPP_IPV6) && 129 brnet->filter_pppoe_tagged; 130 } 131 132 /* largest possible L2 header, see br_nf_dev_queue_xmit() */ 133 #define NF_BRIDGE_MAX_MAC_HEADER_LENGTH (PPPOE_SES_HLEN + ETH_HLEN) 134 135 struct brnf_frag_data { 136 char mac[NF_BRIDGE_MAX_MAC_HEADER_LENGTH]; 137 u8 encap_size; 138 u8 size; 139 u16 vlan_tci; 140 __be16 vlan_proto; 141 }; 142 143 static DEFINE_PER_CPU(struct brnf_frag_data, brnf_frag_data_storage); 144 145 static void nf_bridge_info_free(struct sk_buff *skb) 146 { 147 skb_ext_del(skb, SKB_EXT_BRIDGE_NF); 148 } 149 150 static inline struct net_device *bridge_parent(const struct net_device *dev) 151 { 152 struct net_bridge_port *port; 153 154 port = br_port_get_rcu(dev); 155 return port ? port->br->dev : NULL; 156 } 157 158 static inline struct nf_bridge_info *nf_bridge_unshare(struct sk_buff *skb) 159 { 160 return skb_ext_add(skb, SKB_EXT_BRIDGE_NF); 161 } 162 163 unsigned int nf_bridge_encap_header_len(const struct sk_buff *skb) 164 { 165 switch (skb->protocol) { 166 case __cpu_to_be16(ETH_P_8021Q): 167 return VLAN_HLEN; 168 case __cpu_to_be16(ETH_P_PPP_SES): 169 return PPPOE_SES_HLEN; 170 default: 171 return 0; 172 } 173 } 174 175 static inline void nf_bridge_pull_encap_header(struct sk_buff *skb) 176 { 177 unsigned int len = nf_bridge_encap_header_len(skb); 178 179 skb_pull(skb, len); 180 skb->network_header += len; 181 } 182 183 static inline void nf_bridge_pull_encap_header_rcsum(struct sk_buff *skb) 184 { 185 unsigned int len = nf_bridge_encap_header_len(skb); 186 187 skb_pull_rcsum(skb, len); 188 skb->network_header += len; 189 } 190 191 /* When handing a packet over to the IP layer 192 * check whether we have a skb that is in the 193 * expected format 194 */ 195 196 static int br_validate_ipv4(struct net *net, struct sk_buff *skb) 197 { 198 const struct iphdr *iph; 199 u32 len; 200 201 if (!pskb_may_pull(skb, sizeof(struct iphdr))) 202 goto inhdr_error; 203 204 iph = ip_hdr(skb); 205 206 /* Basic sanity checks */ 207 if (iph->ihl < 5 || iph->version != 4) 208 goto inhdr_error; 209 210 if (!pskb_may_pull(skb, iph->ihl*4)) 211 goto inhdr_error; 212 213 iph = ip_hdr(skb); 214 if (unlikely(ip_fast_csum((u8 *)iph, iph->ihl))) 215 goto csum_error; 216 217 len = skb_ip_totlen(skb); 218 if (skb->len < len) { 219 __IP_INC_STATS(net, IPSTATS_MIB_INTRUNCATEDPKTS); 220 goto drop; 221 } else if (len < (iph->ihl*4)) 222 goto inhdr_error; 223 224 if (pskb_trim_rcsum(skb, len)) { 225 __IP_INC_STATS(net, IPSTATS_MIB_INDISCARDS); 226 goto drop; 227 } 228 229 memset(IPCB(skb), 0, sizeof(struct inet_skb_parm)); 230 /* We should really parse IP options here but until 231 * somebody who actually uses IP options complains to 232 * us we'll just silently ignore the options because 233 * we're lazy! 234 */ 235 return 0; 236 237 csum_error: 238 __IP_INC_STATS(net, IPSTATS_MIB_CSUMERRORS); 239 inhdr_error: 240 __IP_INC_STATS(net, IPSTATS_MIB_INHDRERRORS); 241 drop: 242 return -1; 243 } 244 245 void nf_bridge_update_protocol(struct sk_buff *skb) 246 { 247 const struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); 248 249 switch (nf_bridge->orig_proto) { 250 case BRNF_PROTO_8021Q: 251 skb->protocol = htons(ETH_P_8021Q); 252 break; 253 case BRNF_PROTO_PPPOE: 254 skb->protocol = htons(ETH_P_PPP_SES); 255 break; 256 case BRNF_PROTO_UNCHANGED: 257 break; 258 } 259 } 260 261 /* Obtain the correct destination MAC address, while preserving the original 262 * source MAC address. If we already know this address, we just copy it. If we 263 * don't, we use the neighbour framework to find out. In both cases, we make 264 * sure that br_handle_frame_finish() is called afterwards. 265 */ 266 int br_nf_pre_routing_finish_bridge(struct net *net, struct sock *sk, struct sk_buff *skb) 267 { 268 struct neighbour *neigh; 269 struct dst_entry *dst; 270 271 skb->dev = bridge_parent(skb->dev); 272 if (!skb->dev) 273 goto free_skb; 274 dst = skb_dst(skb); 275 neigh = dst_neigh_lookup_skb(dst, skb); 276 if (neigh) { 277 struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); 278 int ret; 279 280 if ((READ_ONCE(neigh->nud_state) & NUD_CONNECTED) && 281 READ_ONCE(neigh->hh.hh_len)) { 282 neigh_hh_bridge(&neigh->hh, skb); 283 skb->dev = nf_bridge->physindev; 284 ret = br_handle_frame_finish(net, sk, skb); 285 } else { 286 /* the neighbour function below overwrites the complete 287 * MAC header, so we save the Ethernet source address and 288 * protocol number. 289 */ 290 skb_copy_from_linear_data_offset(skb, 291 -(ETH_HLEN-ETH_ALEN), 292 nf_bridge->neigh_header, 293 ETH_HLEN-ETH_ALEN); 294 /* tell br_dev_xmit to continue with forwarding */ 295 nf_bridge->bridged_dnat = 1; 296 /* FIXME Need to refragment */ 297 ret = READ_ONCE(neigh->output)(neigh, skb); 298 } 299 neigh_release(neigh); 300 return ret; 301 } 302 free_skb: 303 kfree_skb(skb); 304 return 0; 305 } 306 307 static inline bool 308 br_nf_ipv4_daddr_was_changed(const struct sk_buff *skb, 309 const struct nf_bridge_info *nf_bridge) 310 { 311 return ip_hdr(skb)->daddr != nf_bridge->ipv4_daddr; 312 } 313 314 /* This requires some explaining. If DNAT has taken place, 315 * we will need to fix up the destination Ethernet address. 316 * This is also true when SNAT takes place (for the reply direction). 317 * 318 * There are two cases to consider: 319 * 1. The packet was DNAT'ed to a device in the same bridge 320 * port group as it was received on. We can still bridge 321 * the packet. 322 * 2. The packet was DNAT'ed to a different device, either 323 * a non-bridged device or another bridge port group. 324 * The packet will need to be routed. 325 * 326 * The correct way of distinguishing between these two cases is to 327 * call ip_route_input() and to look at skb->dst->dev, which is 328 * changed to the destination device if ip_route_input() succeeds. 329 * 330 * Let's first consider the case that ip_route_input() succeeds: 331 * 332 * If the output device equals the logical bridge device the packet 333 * came in on, we can consider this bridging. The corresponding MAC 334 * address will be obtained in br_nf_pre_routing_finish_bridge. 335 * Otherwise, the packet is considered to be routed and we just 336 * change the destination MAC address so that the packet will 337 * later be passed up to the IP stack to be routed. For a redirected 338 * packet, ip_route_input() will give back the localhost as output device, 339 * which differs from the bridge device. 340 * 341 * Let's now consider the case that ip_route_input() fails: 342 * 343 * This can be because the destination address is martian, in which case 344 * the packet will be dropped. 345 * If IP forwarding is disabled, ip_route_input() will fail, while 346 * ip_route_output_key() can return success. The source 347 * address for ip_route_output_key() is set to zero, so ip_route_output_key() 348 * thinks we're handling a locally generated packet and won't care 349 * if IP forwarding is enabled. If the output device equals the logical bridge 350 * device, we proceed as if ip_route_input() succeeded. If it differs from the 351 * logical bridge port or if ip_route_output_key() fails we drop the packet. 352 */ 353 static int br_nf_pre_routing_finish(struct net *net, struct sock *sk, struct sk_buff *skb) 354 { 355 struct net_device *dev = skb->dev; 356 struct iphdr *iph = ip_hdr(skb); 357 struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); 358 struct rtable *rt; 359 int err; 360 361 nf_bridge->frag_max_size = IPCB(skb)->frag_max_size; 362 363 if (nf_bridge->pkt_otherhost) { 364 skb->pkt_type = PACKET_OTHERHOST; 365 nf_bridge->pkt_otherhost = false; 366 } 367 nf_bridge->in_prerouting = 0; 368 if (br_nf_ipv4_daddr_was_changed(skb, nf_bridge)) { 369 if ((err = ip_route_input(skb, iph->daddr, iph->saddr, iph->tos, dev))) { 370 struct in_device *in_dev = __in_dev_get_rcu(dev); 371 372 /* If err equals -EHOSTUNREACH the error is due to a 373 * martian destination or due to the fact that 374 * forwarding is disabled. For most martian packets, 375 * ip_route_output_key() will fail. It won't fail for 2 types of 376 * martian destinations: loopback destinations and destination 377 * 0.0.0.0. In both cases the packet will be dropped because the 378 * destination is the loopback device and not the bridge. */ 379 if (err != -EHOSTUNREACH || !in_dev || IN_DEV_FORWARD(in_dev)) 380 goto free_skb; 381 382 rt = ip_route_output(net, iph->daddr, 0, 383 RT_TOS(iph->tos), 0); 384 if (!IS_ERR(rt)) { 385 /* - Bridged-and-DNAT'ed traffic doesn't 386 * require ip_forwarding. */ 387 if (rt->dst.dev == dev) { 388 skb_dst_drop(skb); 389 skb_dst_set(skb, &rt->dst); 390 goto bridged_dnat; 391 } 392 ip_rt_put(rt); 393 } 394 free_skb: 395 kfree_skb(skb); 396 return 0; 397 } else { 398 if (skb_dst(skb)->dev == dev) { 399 bridged_dnat: 400 skb->dev = nf_bridge->physindev; 401 nf_bridge_update_protocol(skb); 402 nf_bridge_push_encap_header(skb); 403 br_nf_hook_thresh(NF_BR_PRE_ROUTING, 404 net, sk, skb, skb->dev, 405 NULL, 406 br_nf_pre_routing_finish_bridge); 407 return 0; 408 } 409 ether_addr_copy(eth_hdr(skb)->h_dest, dev->dev_addr); 410 skb->pkt_type = PACKET_HOST; 411 } 412 } else { 413 rt = bridge_parent_rtable(nf_bridge->physindev); 414 if (!rt) { 415 kfree_skb(skb); 416 return 0; 417 } 418 skb_dst_drop(skb); 419 skb_dst_set_noref(skb, &rt->dst); 420 } 421 422 skb->dev = nf_bridge->physindev; 423 nf_bridge_update_protocol(skb); 424 nf_bridge_push_encap_header(skb); 425 br_nf_hook_thresh(NF_BR_PRE_ROUTING, net, sk, skb, skb->dev, NULL, 426 br_handle_frame_finish); 427 return 0; 428 } 429 430 static struct net_device *brnf_get_logical_dev(struct sk_buff *skb, 431 const struct net_device *dev, 432 const struct net *net) 433 { 434 struct net_device *vlan, *br; 435 struct brnf_net *brnet = net_generic(net, brnf_net_id); 436 437 br = bridge_parent(dev); 438 439 if (brnet->pass_vlan_indev == 0 || !skb_vlan_tag_present(skb)) 440 return br; 441 442 vlan = __vlan_find_dev_deep_rcu(br, skb->vlan_proto, 443 skb_vlan_tag_get(skb) & VLAN_VID_MASK); 444 445 return vlan ? vlan : br; 446 } 447 448 /* Some common code for IPv4/IPv6 */ 449 struct net_device *setup_pre_routing(struct sk_buff *skb, const struct net *net) 450 { 451 struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); 452 453 if (skb->pkt_type == PACKET_OTHERHOST) { 454 skb->pkt_type = PACKET_HOST; 455 nf_bridge->pkt_otherhost = true; 456 } 457 458 nf_bridge->in_prerouting = 1; 459 nf_bridge->physindev = skb->dev; 460 skb->dev = brnf_get_logical_dev(skb, skb->dev, net); 461 462 if (skb->protocol == htons(ETH_P_8021Q)) 463 nf_bridge->orig_proto = BRNF_PROTO_8021Q; 464 else if (skb->protocol == htons(ETH_P_PPP_SES)) 465 nf_bridge->orig_proto = BRNF_PROTO_PPPOE; 466 467 /* Must drop socket now because of tproxy. */ 468 skb_orphan(skb); 469 return skb->dev; 470 } 471 472 /* Direct IPv6 traffic to br_nf_pre_routing_ipv6. 473 * Replicate the checks that IPv4 does on packet reception. 474 * Set skb->dev to the bridge device (i.e. parent of the 475 * receiving device) to make netfilter happy, the REDIRECT 476 * target in particular. Save the original destination IP 477 * address to be able to detect DNAT afterwards. */ 478 static unsigned int br_nf_pre_routing(void *priv, 479 struct sk_buff *skb, 480 const struct nf_hook_state *state) 481 { 482 struct nf_bridge_info *nf_bridge; 483 struct net_bridge_port *p; 484 struct net_bridge *br; 485 __u32 len = nf_bridge_encap_header_len(skb); 486 struct brnf_net *brnet; 487 488 if (unlikely(!pskb_may_pull(skb, len))) 489 return NF_DROP_REASON(skb, SKB_DROP_REASON_PKT_TOO_SMALL, 0); 490 491 p = br_port_get_rcu(state->in); 492 if (p == NULL) 493 return NF_DROP_REASON(skb, SKB_DROP_REASON_DEV_READY, 0); 494 br = p->br; 495 496 brnet = net_generic(state->net, brnf_net_id); 497 if (IS_IPV6(skb) || is_vlan_ipv6(skb, state->net) || 498 is_pppoe_ipv6(skb, state->net)) { 499 if (!brnet->call_ip6tables && 500 !br_opt_get(br, BROPT_NF_CALL_IP6TABLES)) 501 return NF_ACCEPT; 502 if (!ipv6_mod_enabled()) { 503 pr_warn_once("Module ipv6 is disabled, so call_ip6tables is not supported."); 504 return NF_DROP_REASON(skb, SKB_DROP_REASON_IPV6DISABLED, 0); 505 } 506 507 nf_bridge_pull_encap_header_rcsum(skb); 508 return br_nf_pre_routing_ipv6(priv, skb, state); 509 } 510 511 if (!brnet->call_iptables && !br_opt_get(br, BROPT_NF_CALL_IPTABLES)) 512 return NF_ACCEPT; 513 514 if (!IS_IP(skb) && !is_vlan_ip(skb, state->net) && 515 !is_pppoe_ip(skb, state->net)) 516 return NF_ACCEPT; 517 518 nf_bridge_pull_encap_header_rcsum(skb); 519 520 if (br_validate_ipv4(state->net, skb)) 521 return NF_DROP_REASON(skb, SKB_DROP_REASON_IP_INHDR, 0); 522 523 if (!nf_bridge_alloc(skb)) 524 return NF_DROP_REASON(skb, SKB_DROP_REASON_NOMEM, 0); 525 if (!setup_pre_routing(skb, state->net)) 526 return NF_DROP_REASON(skb, SKB_DROP_REASON_DEV_READY, 0); 527 528 nf_bridge = nf_bridge_info_get(skb); 529 nf_bridge->ipv4_daddr = ip_hdr(skb)->daddr; 530 531 skb->protocol = htons(ETH_P_IP); 532 skb->transport_header = skb->network_header + ip_hdr(skb)->ihl * 4; 533 534 NF_HOOK(NFPROTO_IPV4, NF_INET_PRE_ROUTING, state->net, state->sk, skb, 535 skb->dev, NULL, 536 br_nf_pre_routing_finish); 537 538 return NF_STOLEN; 539 } 540 541 542 /* PF_BRIDGE/FORWARD *************************************************/ 543 static int br_nf_forward_finish(struct net *net, struct sock *sk, struct sk_buff *skb) 544 { 545 struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); 546 struct net_device *in; 547 548 if (!IS_ARP(skb) && !is_vlan_arp(skb, net)) { 549 550 if (skb->protocol == htons(ETH_P_IP)) 551 nf_bridge->frag_max_size = IPCB(skb)->frag_max_size; 552 553 if (skb->protocol == htons(ETH_P_IPV6)) 554 nf_bridge->frag_max_size = IP6CB(skb)->frag_max_size; 555 556 in = nf_bridge->physindev; 557 if (nf_bridge->pkt_otherhost) { 558 skb->pkt_type = PACKET_OTHERHOST; 559 nf_bridge->pkt_otherhost = false; 560 } 561 nf_bridge_update_protocol(skb); 562 } else { 563 in = *((struct net_device **)(skb->cb)); 564 } 565 nf_bridge_push_encap_header(skb); 566 567 br_nf_hook_thresh(NF_BR_FORWARD, net, sk, skb, in, skb->dev, 568 br_forward_finish); 569 return 0; 570 } 571 572 573 static unsigned int br_nf_forward_ip(struct sk_buff *skb, 574 const struct nf_hook_state *state, 575 u8 pf) 576 { 577 struct nf_bridge_info *nf_bridge; 578 struct net_device *parent; 579 580 nf_bridge = nf_bridge_info_get(skb); 581 if (!nf_bridge) 582 return NF_ACCEPT; 583 584 /* Need exclusive nf_bridge_info since we might have multiple 585 * different physoutdevs. */ 586 if (!nf_bridge_unshare(skb)) 587 return NF_DROP_REASON(skb, SKB_DROP_REASON_NOMEM, 0); 588 589 nf_bridge = nf_bridge_info_get(skb); 590 if (!nf_bridge) 591 return NF_DROP_REASON(skb, SKB_DROP_REASON_NOMEM, 0); 592 593 parent = bridge_parent(state->out); 594 if (!parent) 595 return NF_DROP_REASON(skb, SKB_DROP_REASON_DEV_READY, 0); 596 597 nf_bridge_pull_encap_header(skb); 598 599 if (skb->pkt_type == PACKET_OTHERHOST) { 600 skb->pkt_type = PACKET_HOST; 601 nf_bridge->pkt_otherhost = true; 602 } 603 604 if (pf == NFPROTO_IPV4) { 605 if (br_validate_ipv4(state->net, skb)) 606 return NF_DROP_REASON(skb, SKB_DROP_REASON_IP_INHDR, 0); 607 IPCB(skb)->frag_max_size = nf_bridge->frag_max_size; 608 skb->protocol = htons(ETH_P_IP); 609 } else if (pf == NFPROTO_IPV6) { 610 if (br_validate_ipv6(state->net, skb)) 611 return NF_DROP_REASON(skb, SKB_DROP_REASON_IP_INHDR, 0); 612 IP6CB(skb)->frag_max_size = nf_bridge->frag_max_size; 613 skb->protocol = htons(ETH_P_IPV6); 614 } else { 615 WARN_ON_ONCE(1); 616 return NF_DROP; 617 } 618 619 nf_bridge->physoutdev = skb->dev; 620 621 NF_HOOK(pf, NF_INET_FORWARD, state->net, NULL, skb, 622 brnf_get_logical_dev(skb, state->in, state->net), 623 parent, br_nf_forward_finish); 624 625 return NF_STOLEN; 626 } 627 628 static unsigned int br_nf_forward_arp(struct sk_buff *skb, 629 const struct nf_hook_state *state) 630 { 631 struct net_bridge_port *p; 632 struct net_bridge *br; 633 struct net_device **d = (struct net_device **)(skb->cb); 634 struct brnf_net *brnet; 635 636 p = br_port_get_rcu(state->out); 637 if (p == NULL) 638 return NF_ACCEPT; 639 br = p->br; 640 641 brnet = net_generic(state->net, brnf_net_id); 642 if (!brnet->call_arptables && !br_opt_get(br, BROPT_NF_CALL_ARPTABLES)) 643 return NF_ACCEPT; 644 645 if (is_vlan_arp(skb, state->net)) 646 nf_bridge_pull_encap_header(skb); 647 648 if (unlikely(!pskb_may_pull(skb, sizeof(struct arphdr)))) 649 return NF_DROP_REASON(skb, SKB_DROP_REASON_PKT_TOO_SMALL, 0); 650 651 if (arp_hdr(skb)->ar_pln != 4) { 652 if (is_vlan_arp(skb, state->net)) 653 nf_bridge_push_encap_header(skb); 654 return NF_ACCEPT; 655 } 656 *d = state->in; 657 NF_HOOK(NFPROTO_ARP, NF_ARP_FORWARD, state->net, state->sk, skb, 658 state->in, state->out, br_nf_forward_finish); 659 660 return NF_STOLEN; 661 } 662 663 /* This is the 'purely bridged' case. For IP, we pass the packet to 664 * netfilter with indev and outdev set to the bridge device, 665 * but we are still able to filter on the 'real' indev/outdev 666 * because of the physdev module. For ARP, indev and outdev are the 667 * bridge ports. 668 */ 669 static unsigned int br_nf_forward(void *priv, 670 struct sk_buff *skb, 671 const struct nf_hook_state *state) 672 { 673 if (IS_IP(skb) || is_vlan_ip(skb, state->net) || 674 is_pppoe_ip(skb, state->net)) 675 return br_nf_forward_ip(skb, state, NFPROTO_IPV4); 676 if (IS_IPV6(skb) || is_vlan_ipv6(skb, state->net) || 677 is_pppoe_ipv6(skb, state->net)) 678 return br_nf_forward_ip(skb, state, NFPROTO_IPV6); 679 if (IS_ARP(skb) || is_vlan_arp(skb, state->net)) 680 return br_nf_forward_arp(skb, state); 681 682 return NF_ACCEPT; 683 } 684 685 static int br_nf_push_frag_xmit(struct net *net, struct sock *sk, struct sk_buff *skb) 686 { 687 struct brnf_frag_data *data; 688 int err; 689 690 data = this_cpu_ptr(&brnf_frag_data_storage); 691 err = skb_cow_head(skb, data->size); 692 693 if (err) { 694 kfree_skb(skb); 695 return 0; 696 } 697 698 if (data->vlan_proto) 699 __vlan_hwaccel_put_tag(skb, data->vlan_proto, data->vlan_tci); 700 701 skb_copy_to_linear_data_offset(skb, -data->size, data->mac, data->size); 702 __skb_push(skb, data->encap_size); 703 704 nf_bridge_info_free(skb); 705 return br_dev_queue_push_xmit(net, sk, skb); 706 } 707 708 static int 709 br_nf_ip_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, 710 int (*output)(struct net *, struct sock *, struct sk_buff *)) 711 { 712 unsigned int mtu = ip_skb_dst_mtu(sk, skb); 713 struct iphdr *iph = ip_hdr(skb); 714 715 if (unlikely(((iph->frag_off & htons(IP_DF)) && !skb->ignore_df) || 716 (IPCB(skb)->frag_max_size && 717 IPCB(skb)->frag_max_size > mtu))) { 718 IP_INC_STATS(net, IPSTATS_MIB_FRAGFAILS); 719 kfree_skb(skb); 720 return -EMSGSIZE; 721 } 722 723 return ip_do_fragment(net, sk, skb, output); 724 } 725 726 static unsigned int nf_bridge_mtu_reduction(const struct sk_buff *skb) 727 { 728 const struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); 729 730 if (nf_bridge->orig_proto == BRNF_PROTO_PPPOE) 731 return PPPOE_SES_HLEN; 732 return 0; 733 } 734 735 static int br_nf_dev_queue_xmit(struct net *net, struct sock *sk, struct sk_buff *skb) 736 { 737 struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); 738 unsigned int mtu, mtu_reserved; 739 740 mtu_reserved = nf_bridge_mtu_reduction(skb); 741 mtu = skb->dev->mtu; 742 743 if (nf_bridge->pkt_otherhost) { 744 skb->pkt_type = PACKET_OTHERHOST; 745 nf_bridge->pkt_otherhost = false; 746 } 747 748 if (nf_bridge->frag_max_size && nf_bridge->frag_max_size < mtu) 749 mtu = nf_bridge->frag_max_size; 750 751 nf_bridge_update_protocol(skb); 752 nf_bridge_push_encap_header(skb); 753 754 if (skb_is_gso(skb) || skb->len + mtu_reserved <= mtu) { 755 nf_bridge_info_free(skb); 756 return br_dev_queue_push_xmit(net, sk, skb); 757 } 758 759 /* This is wrong! We should preserve the original fragment 760 * boundaries by preserving frag_list rather than refragmenting. 761 */ 762 if (IS_ENABLED(CONFIG_NF_DEFRAG_IPV4) && 763 skb->protocol == htons(ETH_P_IP)) { 764 struct brnf_frag_data *data; 765 766 if (br_validate_ipv4(net, skb)) 767 goto drop; 768 769 IPCB(skb)->frag_max_size = nf_bridge->frag_max_size; 770 771 data = this_cpu_ptr(&brnf_frag_data_storage); 772 773 if (skb_vlan_tag_present(skb)) { 774 data->vlan_tci = skb->vlan_tci; 775 data->vlan_proto = skb->vlan_proto; 776 } else { 777 data->vlan_proto = 0; 778 } 779 780 data->encap_size = nf_bridge_encap_header_len(skb); 781 data->size = ETH_HLEN + data->encap_size; 782 783 skb_copy_from_linear_data_offset(skb, -data->size, data->mac, 784 data->size); 785 786 return br_nf_ip_fragment(net, sk, skb, br_nf_push_frag_xmit); 787 } 788 if (IS_ENABLED(CONFIG_NF_DEFRAG_IPV6) && 789 skb->protocol == htons(ETH_P_IPV6)) { 790 const struct nf_ipv6_ops *v6ops = nf_get_ipv6_ops(); 791 struct brnf_frag_data *data; 792 793 if (br_validate_ipv6(net, skb)) 794 goto drop; 795 796 IP6CB(skb)->frag_max_size = nf_bridge->frag_max_size; 797 798 data = this_cpu_ptr(&brnf_frag_data_storage); 799 data->encap_size = nf_bridge_encap_header_len(skb); 800 data->size = ETH_HLEN + data->encap_size; 801 802 skb_copy_from_linear_data_offset(skb, -data->size, data->mac, 803 data->size); 804 805 if (v6ops) 806 return v6ops->fragment(net, sk, skb, br_nf_push_frag_xmit); 807 808 kfree_skb(skb); 809 return -EMSGSIZE; 810 } 811 nf_bridge_info_free(skb); 812 return br_dev_queue_push_xmit(net, sk, skb); 813 drop: 814 kfree_skb(skb); 815 return 0; 816 } 817 818 /* PF_BRIDGE/POST_ROUTING ********************************************/ 819 static unsigned int br_nf_post_routing(void *priv, 820 struct sk_buff *skb, 821 const struct nf_hook_state *state) 822 { 823 struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); 824 struct net_device *realoutdev = bridge_parent(skb->dev); 825 u_int8_t pf; 826 827 /* if nf_bridge is set, but ->physoutdev is NULL, this packet came in 828 * on a bridge, but was delivered locally and is now being routed: 829 * 830 * POST_ROUTING was already invoked from the ip stack. 831 */ 832 if (!nf_bridge || !nf_bridge->physoutdev) 833 return NF_ACCEPT; 834 835 if (!realoutdev) 836 return NF_DROP_REASON(skb, SKB_DROP_REASON_DEV_READY, 0); 837 838 if (IS_IP(skb) || is_vlan_ip(skb, state->net) || 839 is_pppoe_ip(skb, state->net)) 840 pf = NFPROTO_IPV4; 841 else if (IS_IPV6(skb) || is_vlan_ipv6(skb, state->net) || 842 is_pppoe_ipv6(skb, state->net)) 843 pf = NFPROTO_IPV6; 844 else 845 return NF_ACCEPT; 846 847 if (skb->pkt_type == PACKET_OTHERHOST) { 848 skb->pkt_type = PACKET_HOST; 849 nf_bridge->pkt_otherhost = true; 850 } 851 852 nf_bridge_pull_encap_header(skb); 853 if (pf == NFPROTO_IPV4) 854 skb->protocol = htons(ETH_P_IP); 855 else 856 skb->protocol = htons(ETH_P_IPV6); 857 858 NF_HOOK(pf, NF_INET_POST_ROUTING, state->net, state->sk, skb, 859 NULL, realoutdev, 860 br_nf_dev_queue_xmit); 861 862 return NF_STOLEN; 863 } 864 865 /* IP/SABOTAGE *****************************************************/ 866 /* Don't hand locally destined packets to PF_INET(6)/PRE_ROUTING 867 * for the second time. */ 868 static unsigned int ip_sabotage_in(void *priv, 869 struct sk_buff *skb, 870 const struct nf_hook_state *state) 871 { 872 struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); 873 874 if (nf_bridge) { 875 if (nf_bridge->sabotage_in_done) 876 return NF_ACCEPT; 877 878 if (!nf_bridge->in_prerouting && 879 !netif_is_l3_master(skb->dev) && 880 !netif_is_l3_slave(skb->dev)) { 881 nf_bridge->sabotage_in_done = 1; 882 state->okfn(state->net, state->sk, skb); 883 return NF_STOLEN; 884 } 885 } 886 887 return NF_ACCEPT; 888 } 889 890 /* This is called when br_netfilter has called into iptables/netfilter, 891 * and DNAT has taken place on a bridge-forwarded packet. 892 * 893 * neigh->output has created a new MAC header, with local br0 MAC 894 * as saddr. 895 * 896 * This restores the original MAC saddr of the bridged packet 897 * before invoking bridge forward logic to transmit the packet. 898 */ 899 static void br_nf_pre_routing_finish_bridge_slow(struct sk_buff *skb) 900 { 901 struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); 902 903 skb_pull(skb, ETH_HLEN); 904 nf_bridge->bridged_dnat = 0; 905 906 BUILD_BUG_ON(sizeof(nf_bridge->neigh_header) != (ETH_HLEN - ETH_ALEN)); 907 908 skb_copy_to_linear_data_offset(skb, -(ETH_HLEN - ETH_ALEN), 909 nf_bridge->neigh_header, 910 ETH_HLEN - ETH_ALEN); 911 skb->dev = nf_bridge->physindev; 912 913 nf_bridge->physoutdev = NULL; 914 br_handle_frame_finish(dev_net(skb->dev), NULL, skb); 915 } 916 917 static int br_nf_dev_xmit(struct sk_buff *skb) 918 { 919 const struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); 920 921 if (nf_bridge && nf_bridge->bridged_dnat) { 922 br_nf_pre_routing_finish_bridge_slow(skb); 923 return 1; 924 } 925 return 0; 926 } 927 928 static const struct nf_br_ops br_ops = { 929 .br_dev_xmit_hook = br_nf_dev_xmit, 930 }; 931 932 /* For br_nf_post_routing, we need (prio = NF_BR_PRI_LAST), because 933 * br_dev_queue_push_xmit is called afterwards */ 934 static const struct nf_hook_ops br_nf_ops[] = { 935 { 936 .hook = br_nf_pre_routing, 937 .pf = NFPROTO_BRIDGE, 938 .hooknum = NF_BR_PRE_ROUTING, 939 .priority = NF_BR_PRI_BRNF, 940 }, 941 { 942 .hook = br_nf_forward, 943 .pf = NFPROTO_BRIDGE, 944 .hooknum = NF_BR_FORWARD, 945 .priority = NF_BR_PRI_BRNF, 946 }, 947 { 948 .hook = br_nf_post_routing, 949 .pf = NFPROTO_BRIDGE, 950 .hooknum = NF_BR_POST_ROUTING, 951 .priority = NF_BR_PRI_LAST, 952 }, 953 { 954 .hook = ip_sabotage_in, 955 .pf = NFPROTO_IPV4, 956 .hooknum = NF_INET_PRE_ROUTING, 957 .priority = NF_IP_PRI_FIRST, 958 }, 959 { 960 .hook = ip_sabotage_in, 961 .pf = NFPROTO_IPV6, 962 .hooknum = NF_INET_PRE_ROUTING, 963 .priority = NF_IP6_PRI_FIRST, 964 }, 965 }; 966 967 static int brnf_device_event(struct notifier_block *unused, unsigned long event, 968 void *ptr) 969 { 970 struct net_device *dev = netdev_notifier_info_to_dev(ptr); 971 struct brnf_net *brnet; 972 struct net *net; 973 int ret; 974 975 if (event != NETDEV_REGISTER || !netif_is_bridge_master(dev)) 976 return NOTIFY_DONE; 977 978 ASSERT_RTNL(); 979 980 net = dev_net(dev); 981 brnet = net_generic(net, brnf_net_id); 982 if (brnet->enabled) 983 return NOTIFY_OK; 984 985 ret = nf_register_net_hooks(net, br_nf_ops, ARRAY_SIZE(br_nf_ops)); 986 if (ret) 987 return NOTIFY_BAD; 988 989 brnet->enabled = true; 990 return NOTIFY_OK; 991 } 992 993 static struct notifier_block brnf_notifier __read_mostly = { 994 .notifier_call = brnf_device_event, 995 }; 996 997 /* recursively invokes nf_hook_slow (again), skipping already-called 998 * hooks (< NF_BR_PRI_BRNF). 999 * 1000 * Called with rcu read lock held. 1001 */ 1002 int br_nf_hook_thresh(unsigned int hook, struct net *net, 1003 struct sock *sk, struct sk_buff *skb, 1004 struct net_device *indev, 1005 struct net_device *outdev, 1006 int (*okfn)(struct net *, struct sock *, 1007 struct sk_buff *)) 1008 { 1009 const struct nf_hook_entries *e; 1010 struct nf_hook_state state; 1011 struct nf_hook_ops **ops; 1012 unsigned int i; 1013 int ret; 1014 1015 e = rcu_dereference(net->nf.hooks_bridge[hook]); 1016 if (!e) 1017 return okfn(net, sk, skb); 1018 1019 ops = nf_hook_entries_get_hook_ops(e); 1020 for (i = 0; i < e->num_hook_entries; i++) { 1021 /* These hooks have already been called */ 1022 if (ops[i]->priority < NF_BR_PRI_BRNF) 1023 continue; 1024 1025 /* These hooks have not been called yet, run them. */ 1026 if (ops[i]->priority > NF_BR_PRI_BRNF) 1027 break; 1028 1029 /* take a closer look at NF_BR_PRI_BRNF. */ 1030 if (ops[i]->hook == br_nf_pre_routing) { 1031 /* This hook diverted the skb to this function, 1032 * hooks after this have not been run yet. 1033 */ 1034 i++; 1035 break; 1036 } 1037 } 1038 1039 nf_hook_state_init(&state, hook, NFPROTO_BRIDGE, indev, outdev, 1040 sk, net, okfn); 1041 1042 ret = nf_hook_slow(skb, &state, e, i); 1043 if (ret == 1) 1044 ret = okfn(net, sk, skb); 1045 1046 return ret; 1047 } 1048 1049 #ifdef CONFIG_SYSCTL 1050 static 1051 int brnf_sysctl_call_tables(struct ctl_table *ctl, int write, 1052 void *buffer, size_t *lenp, loff_t *ppos) 1053 { 1054 int ret; 1055 1056 ret = proc_dointvec(ctl, write, buffer, lenp, ppos); 1057 1058 if (write && *(int *)(ctl->data)) 1059 *(int *)(ctl->data) = 1; 1060 return ret; 1061 } 1062 1063 static struct ctl_table brnf_table[] = { 1064 { 1065 .procname = "bridge-nf-call-arptables", 1066 .maxlen = sizeof(int), 1067 .mode = 0644, 1068 .proc_handler = brnf_sysctl_call_tables, 1069 }, 1070 { 1071 .procname = "bridge-nf-call-iptables", 1072 .maxlen = sizeof(int), 1073 .mode = 0644, 1074 .proc_handler = brnf_sysctl_call_tables, 1075 }, 1076 { 1077 .procname = "bridge-nf-call-ip6tables", 1078 .maxlen = sizeof(int), 1079 .mode = 0644, 1080 .proc_handler = brnf_sysctl_call_tables, 1081 }, 1082 { 1083 .procname = "bridge-nf-filter-vlan-tagged", 1084 .maxlen = sizeof(int), 1085 .mode = 0644, 1086 .proc_handler = brnf_sysctl_call_tables, 1087 }, 1088 { 1089 .procname = "bridge-nf-filter-pppoe-tagged", 1090 .maxlen = sizeof(int), 1091 .mode = 0644, 1092 .proc_handler = brnf_sysctl_call_tables, 1093 }, 1094 { 1095 .procname = "bridge-nf-pass-vlan-input-dev", 1096 .maxlen = sizeof(int), 1097 .mode = 0644, 1098 .proc_handler = brnf_sysctl_call_tables, 1099 }, 1100 { } 1101 }; 1102 1103 static inline void br_netfilter_sysctl_default(struct brnf_net *brnf) 1104 { 1105 brnf->call_iptables = 1; 1106 brnf->call_ip6tables = 1; 1107 brnf->call_arptables = 1; 1108 brnf->filter_vlan_tagged = 0; 1109 brnf->filter_pppoe_tagged = 0; 1110 brnf->pass_vlan_indev = 0; 1111 } 1112 1113 static int br_netfilter_sysctl_init_net(struct net *net) 1114 { 1115 struct ctl_table *table = brnf_table; 1116 struct brnf_net *brnet; 1117 1118 if (!net_eq(net, &init_net)) { 1119 table = kmemdup(table, sizeof(brnf_table), GFP_KERNEL); 1120 if (!table) 1121 return -ENOMEM; 1122 } 1123 1124 brnet = net_generic(net, brnf_net_id); 1125 table[0].data = &brnet->call_arptables; 1126 table[1].data = &brnet->call_iptables; 1127 table[2].data = &brnet->call_ip6tables; 1128 table[3].data = &brnet->filter_vlan_tagged; 1129 table[4].data = &brnet->filter_pppoe_tagged; 1130 table[5].data = &brnet->pass_vlan_indev; 1131 1132 br_netfilter_sysctl_default(brnet); 1133 1134 brnet->ctl_hdr = register_net_sysctl_sz(net, "net/bridge", table, 1135 ARRAY_SIZE(brnf_table)); 1136 if (!brnet->ctl_hdr) { 1137 if (!net_eq(net, &init_net)) 1138 kfree(table); 1139 1140 return -ENOMEM; 1141 } 1142 1143 return 0; 1144 } 1145 1146 static void br_netfilter_sysctl_exit_net(struct net *net, 1147 struct brnf_net *brnet) 1148 { 1149 struct ctl_table *table = brnet->ctl_hdr->ctl_table_arg; 1150 1151 unregister_net_sysctl_table(brnet->ctl_hdr); 1152 if (!net_eq(net, &init_net)) 1153 kfree(table); 1154 } 1155 1156 static int __net_init brnf_init_net(struct net *net) 1157 { 1158 return br_netfilter_sysctl_init_net(net); 1159 } 1160 #endif 1161 1162 static void __net_exit brnf_exit_net(struct net *net) 1163 { 1164 struct brnf_net *brnet; 1165 1166 brnet = net_generic(net, brnf_net_id); 1167 if (brnet->enabled) { 1168 nf_unregister_net_hooks(net, br_nf_ops, ARRAY_SIZE(br_nf_ops)); 1169 brnet->enabled = false; 1170 } 1171 1172 #ifdef CONFIG_SYSCTL 1173 br_netfilter_sysctl_exit_net(net, brnet); 1174 #endif 1175 } 1176 1177 static struct pernet_operations brnf_net_ops __read_mostly = { 1178 #ifdef CONFIG_SYSCTL 1179 .init = brnf_init_net, 1180 #endif 1181 .exit = brnf_exit_net, 1182 .id = &brnf_net_id, 1183 .size = sizeof(struct brnf_net), 1184 }; 1185 1186 static int __init br_netfilter_init(void) 1187 { 1188 int ret; 1189 1190 ret = register_pernet_subsys(&brnf_net_ops); 1191 if (ret < 0) 1192 return ret; 1193 1194 ret = register_netdevice_notifier(&brnf_notifier); 1195 if (ret < 0) { 1196 unregister_pernet_subsys(&brnf_net_ops); 1197 return ret; 1198 } 1199 1200 RCU_INIT_POINTER(nf_br_ops, &br_ops); 1201 printk(KERN_NOTICE "Bridge firewalling registered\n"); 1202 return 0; 1203 } 1204 1205 static void __exit br_netfilter_fini(void) 1206 { 1207 RCU_INIT_POINTER(nf_br_ops, NULL); 1208 unregister_netdevice_notifier(&brnf_notifier); 1209 unregister_pernet_subsys(&brnf_net_ops); 1210 } 1211 1212 module_init(br_netfilter_init); 1213 module_exit(br_netfilter_fini); 1214 1215 MODULE_LICENSE("GPL"); 1216 MODULE_AUTHOR("Lennert Buytenhek <buytenh@gnu.org>"); 1217 MODULE_AUTHOR("Bart De Schuymer <bdschuym@pandora.be>"); 1218 MODULE_DESCRIPTION("Linux ethernet netfilter firewall bridge"); 1219