xref: /linux/net/netfilter/nf_nat_proto.c (revision 3bdab16c55f57a24245c97d707241dd9b48d1a91)
1 /* (C) 1999-2001 Paul `Rusty' Russell
2  * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
3  *
4  * This program is free software; you can redistribute it and/or modify
5  * it under the terms of the GNU General Public License version 2 as
6  * published by the Free Software Foundation.
7  */
8 
9 #include <linux/types.h>
10 #include <linux/export.h>
11 #include <linux/init.h>
12 #include <linux/udp.h>
13 #include <linux/tcp.h>
14 #include <linux/icmp.h>
15 #include <linux/icmpv6.h>
16 
17 #include <linux/dccp.h>
18 #include <linux/sctp.h>
19 #include <net/sctp/checksum.h>
20 
21 #include <linux/netfilter.h>
22 #include <net/netfilter/nf_nat.h>
23 
24 #include <linux/ipv6.h>
25 #include <linux/netfilter_ipv6.h>
26 #include <net/checksum.h>
27 #include <net/ip6_checksum.h>
28 #include <net/ip6_route.h>
29 #include <net/xfrm.h>
30 #include <net/ipv6.h>
31 
32 #include <net/netfilter/nf_conntrack_core.h>
33 #include <net/netfilter/nf_conntrack.h>
34 #include <linux/netfilter/nfnetlink_conntrack.h>
35 
36 static void nf_csum_update(struct sk_buff *skb,
37 			   unsigned int iphdroff, __sum16 *check,
38 			   const struct nf_conntrack_tuple *t,
39 			   enum nf_nat_manip_type maniptype);
40 
41 static void
42 __udp_manip_pkt(struct sk_buff *skb,
43 	        unsigned int iphdroff, struct udphdr *hdr,
44 	        const struct nf_conntrack_tuple *tuple,
45 	        enum nf_nat_manip_type maniptype, bool do_csum)
46 {
47 	__be16 *portptr, newport;
48 
49 	if (maniptype == NF_NAT_MANIP_SRC) {
50 		/* Get rid of src port */
51 		newport = tuple->src.u.udp.port;
52 		portptr = &hdr->source;
53 	} else {
54 		/* Get rid of dst port */
55 		newport = tuple->dst.u.udp.port;
56 		portptr = &hdr->dest;
57 	}
58 	if (do_csum) {
59 		nf_csum_update(skb, iphdroff, &hdr->check, tuple, maniptype);
60 		inet_proto_csum_replace2(&hdr->check, skb, *portptr, newport,
61 					 false);
62 		if (!hdr->check)
63 			hdr->check = CSUM_MANGLED_0;
64 	}
65 	*portptr = newport;
66 }
67 
68 static bool udp_manip_pkt(struct sk_buff *skb,
69 			  unsigned int iphdroff, unsigned int hdroff,
70 			  const struct nf_conntrack_tuple *tuple,
71 			  enum nf_nat_manip_type maniptype)
72 {
73 	struct udphdr *hdr;
74 	bool do_csum;
75 
76 	if (!skb_make_writable(skb, hdroff + sizeof(*hdr)))
77 		return false;
78 
79 	hdr = (struct udphdr *)(skb->data + hdroff);
80 	do_csum = hdr->check || skb->ip_summed == CHECKSUM_PARTIAL;
81 
82 	__udp_manip_pkt(skb, iphdroff, hdr, tuple, maniptype, do_csum);
83 	return true;
84 }
85 
86 static bool udplite_manip_pkt(struct sk_buff *skb,
87 			      unsigned int iphdroff, unsigned int hdroff,
88 			      const struct nf_conntrack_tuple *tuple,
89 			      enum nf_nat_manip_type maniptype)
90 {
91 #ifdef CONFIG_NF_CT_PROTO_UDPLITE
92 	struct udphdr *hdr;
93 
94 	if (!skb_make_writable(skb, hdroff + sizeof(*hdr)))
95 		return false;
96 
97 	hdr = (struct udphdr *)(skb->data + hdroff);
98 	__udp_manip_pkt(skb, iphdroff, hdr, tuple, maniptype, true);
99 #endif
100 	return true;
101 }
102 
103 static bool
104 sctp_manip_pkt(struct sk_buff *skb,
105 	       unsigned int iphdroff, unsigned int hdroff,
106 	       const struct nf_conntrack_tuple *tuple,
107 	       enum nf_nat_manip_type maniptype)
108 {
109 #ifdef CONFIG_NF_CT_PROTO_SCTP
110 	struct sctphdr *hdr;
111 	int hdrsize = 8;
112 
113 	/* This could be an inner header returned in imcp packet; in such
114 	 * cases we cannot update the checksum field since it is outside
115 	 * of the 8 bytes of transport layer headers we are guaranteed.
116 	 */
117 	if (skb->len >= hdroff + sizeof(*hdr))
118 		hdrsize = sizeof(*hdr);
119 
120 	if (!skb_make_writable(skb, hdroff + hdrsize))
121 		return false;
122 
123 	hdr = (struct sctphdr *)(skb->data + hdroff);
124 
125 	if (maniptype == NF_NAT_MANIP_SRC) {
126 		/* Get rid of src port */
127 		hdr->source = tuple->src.u.sctp.port;
128 	} else {
129 		/* Get rid of dst port */
130 		hdr->dest = tuple->dst.u.sctp.port;
131 	}
132 
133 	if (hdrsize < sizeof(*hdr))
134 		return true;
135 
136 	if (skb->ip_summed != CHECKSUM_PARTIAL) {
137 		hdr->checksum = sctp_compute_cksum(skb, hdroff);
138 		skb->ip_summed = CHECKSUM_NONE;
139 	}
140 
141 #endif
142 	return true;
143 }
144 
145 static bool
146 tcp_manip_pkt(struct sk_buff *skb,
147 	      unsigned int iphdroff, unsigned int hdroff,
148 	      const struct nf_conntrack_tuple *tuple,
149 	      enum nf_nat_manip_type maniptype)
150 {
151 	struct tcphdr *hdr;
152 	__be16 *portptr, newport, oldport;
153 	int hdrsize = 8; /* TCP connection tracking guarantees this much */
154 
155 	/* this could be a inner header returned in icmp packet; in such
156 	   cases we cannot update the checksum field since it is outside of
157 	   the 8 bytes of transport layer headers we are guaranteed */
158 	if (skb->len >= hdroff + sizeof(struct tcphdr))
159 		hdrsize = sizeof(struct tcphdr);
160 
161 	if (!skb_make_writable(skb, hdroff + hdrsize))
162 		return false;
163 
164 	hdr = (struct tcphdr *)(skb->data + hdroff);
165 
166 	if (maniptype == NF_NAT_MANIP_SRC) {
167 		/* Get rid of src port */
168 		newport = tuple->src.u.tcp.port;
169 		portptr = &hdr->source;
170 	} else {
171 		/* Get rid of dst port */
172 		newport = tuple->dst.u.tcp.port;
173 		portptr = &hdr->dest;
174 	}
175 
176 	oldport = *portptr;
177 	*portptr = newport;
178 
179 	if (hdrsize < sizeof(*hdr))
180 		return true;
181 
182 	nf_csum_update(skb, iphdroff, &hdr->check, tuple, maniptype);
183 	inet_proto_csum_replace2(&hdr->check, skb, oldport, newport, false);
184 	return true;
185 }
186 
187 static bool
188 dccp_manip_pkt(struct sk_buff *skb,
189 	       unsigned int iphdroff, unsigned int hdroff,
190 	       const struct nf_conntrack_tuple *tuple,
191 	       enum nf_nat_manip_type maniptype)
192 {
193 #ifdef CONFIG_NF_CT_PROTO_DCCP
194 	struct dccp_hdr *hdr;
195 	__be16 *portptr, oldport, newport;
196 	int hdrsize = 8; /* DCCP connection tracking guarantees this much */
197 
198 	if (skb->len >= hdroff + sizeof(struct dccp_hdr))
199 		hdrsize = sizeof(struct dccp_hdr);
200 
201 	if (!skb_make_writable(skb, hdroff + hdrsize))
202 		return false;
203 
204 	hdr = (struct dccp_hdr *)(skb->data + hdroff);
205 
206 	if (maniptype == NF_NAT_MANIP_SRC) {
207 		newport = tuple->src.u.dccp.port;
208 		portptr = &hdr->dccph_sport;
209 	} else {
210 		newport = tuple->dst.u.dccp.port;
211 		portptr = &hdr->dccph_dport;
212 	}
213 
214 	oldport = *portptr;
215 	*portptr = newport;
216 
217 	if (hdrsize < sizeof(*hdr))
218 		return true;
219 
220 	nf_csum_update(skb, iphdroff, &hdr->dccph_checksum, tuple, maniptype);
221 	inet_proto_csum_replace2(&hdr->dccph_checksum, skb, oldport, newport,
222 				 false);
223 #endif
224 	return true;
225 }
226 
227 static bool
228 icmp_manip_pkt(struct sk_buff *skb,
229 	       unsigned int iphdroff, unsigned int hdroff,
230 	       const struct nf_conntrack_tuple *tuple,
231 	       enum nf_nat_manip_type maniptype)
232 {
233 	struct icmphdr *hdr;
234 
235 	if (!skb_make_writable(skb, hdroff + sizeof(*hdr)))
236 		return false;
237 
238 	hdr = (struct icmphdr *)(skb->data + hdroff);
239 	inet_proto_csum_replace2(&hdr->checksum, skb,
240 				 hdr->un.echo.id, tuple->src.u.icmp.id, false);
241 	hdr->un.echo.id = tuple->src.u.icmp.id;
242 	return true;
243 }
244 
245 static bool
246 icmpv6_manip_pkt(struct sk_buff *skb,
247 		 unsigned int iphdroff, unsigned int hdroff,
248 		 const struct nf_conntrack_tuple *tuple,
249 		 enum nf_nat_manip_type maniptype)
250 {
251 	struct icmp6hdr *hdr;
252 
253 	if (!skb_make_writable(skb, hdroff + sizeof(*hdr)))
254 		return false;
255 
256 	hdr = (struct icmp6hdr *)(skb->data + hdroff);
257 	nf_csum_update(skb, iphdroff, &hdr->icmp6_cksum, tuple, maniptype);
258 	if (hdr->icmp6_type == ICMPV6_ECHO_REQUEST ||
259 	    hdr->icmp6_type == ICMPV6_ECHO_REPLY) {
260 		inet_proto_csum_replace2(&hdr->icmp6_cksum, skb,
261 					 hdr->icmp6_identifier,
262 					 tuple->src.u.icmp.id, false);
263 		hdr->icmp6_identifier = tuple->src.u.icmp.id;
264 	}
265 	return true;
266 }
267 
268 /* manipulate a GRE packet according to maniptype */
269 static bool
270 gre_manip_pkt(struct sk_buff *skb,
271 	      unsigned int iphdroff, unsigned int hdroff,
272 	      const struct nf_conntrack_tuple *tuple,
273 	      enum nf_nat_manip_type maniptype)
274 {
275 #if IS_ENABLED(CONFIG_NF_CT_PROTO_GRE)
276 	const struct gre_base_hdr *greh;
277 	struct pptp_gre_header *pgreh;
278 
279 	/* pgreh includes two optional 32bit fields which are not required
280 	 * to be there.  That's where the magic '8' comes from */
281 	if (!skb_make_writable(skb, hdroff + sizeof(*pgreh) - 8))
282 		return false;
283 
284 	greh = (void *)skb->data + hdroff;
285 	pgreh = (struct pptp_gre_header *)greh;
286 
287 	/* we only have destination manip of a packet, since 'source key'
288 	 * is not present in the packet itself */
289 	if (maniptype != NF_NAT_MANIP_DST)
290 		return true;
291 
292 	switch (greh->flags & GRE_VERSION) {
293 	case GRE_VERSION_0:
294 		/* We do not currently NAT any GREv0 packets.
295 		 * Try to behave like "nf_nat_proto_unknown" */
296 		break;
297 	case GRE_VERSION_1:
298 		pr_debug("call_id -> 0x%04x\n", ntohs(tuple->dst.u.gre.key));
299 		pgreh->call_id = tuple->dst.u.gre.key;
300 		break;
301 	default:
302 		pr_debug("can't nat unknown GRE version\n");
303 		return false;
304 	}
305 #endif
306 	return true;
307 }
308 
309 static bool l4proto_manip_pkt(struct sk_buff *skb,
310 			      unsigned int iphdroff, unsigned int hdroff,
311 			      const struct nf_conntrack_tuple *tuple,
312 			      enum nf_nat_manip_type maniptype)
313 {
314 	switch (tuple->dst.protonum) {
315 	case IPPROTO_TCP:
316 		return tcp_manip_pkt(skb, iphdroff, hdroff,
317 				     tuple, maniptype);
318 	case IPPROTO_UDP:
319 		return udp_manip_pkt(skb, iphdroff, hdroff,
320 				     tuple, maniptype);
321 	case IPPROTO_UDPLITE:
322 		return udplite_manip_pkt(skb, iphdroff, hdroff,
323 					 tuple, maniptype);
324 	case IPPROTO_SCTP:
325 		return sctp_manip_pkt(skb, iphdroff, hdroff,
326 				      tuple, maniptype);
327 	case IPPROTO_ICMP:
328 		return icmp_manip_pkt(skb, iphdroff, hdroff,
329 				      tuple, maniptype);
330 	case IPPROTO_ICMPV6:
331 		return icmpv6_manip_pkt(skb, iphdroff, hdroff,
332 					tuple, maniptype);
333 	case IPPROTO_DCCP:
334 		return dccp_manip_pkt(skb, iphdroff, hdroff,
335 				      tuple, maniptype);
336 	case IPPROTO_GRE:
337 		return gre_manip_pkt(skb, iphdroff, hdroff,
338 				     tuple, maniptype);
339 	}
340 
341 	/* If we don't know protocol -- no error, pass it unmodified. */
342 	return true;
343 }
344 
345 static bool nf_nat_ipv4_manip_pkt(struct sk_buff *skb,
346 				  unsigned int iphdroff,
347 				  const struct nf_conntrack_tuple *target,
348 				  enum nf_nat_manip_type maniptype)
349 {
350 	struct iphdr *iph;
351 	unsigned int hdroff;
352 
353 	if (!skb_make_writable(skb, iphdroff + sizeof(*iph)))
354 		return false;
355 
356 	iph = (void *)skb->data + iphdroff;
357 	hdroff = iphdroff + iph->ihl * 4;
358 
359 	if (!l4proto_manip_pkt(skb, iphdroff, hdroff, target, maniptype))
360 		return false;
361 	iph = (void *)skb->data + iphdroff;
362 
363 	if (maniptype == NF_NAT_MANIP_SRC) {
364 		csum_replace4(&iph->check, iph->saddr, target->src.u3.ip);
365 		iph->saddr = target->src.u3.ip;
366 	} else {
367 		csum_replace4(&iph->check, iph->daddr, target->dst.u3.ip);
368 		iph->daddr = target->dst.u3.ip;
369 	}
370 	return true;
371 }
372 
373 static bool nf_nat_ipv6_manip_pkt(struct sk_buff *skb,
374 				  unsigned int iphdroff,
375 				  const struct nf_conntrack_tuple *target,
376 				  enum nf_nat_manip_type maniptype)
377 {
378 #if IS_ENABLED(CONFIG_IPV6)
379 	struct ipv6hdr *ipv6h;
380 	__be16 frag_off;
381 	int hdroff;
382 	u8 nexthdr;
383 
384 	if (!skb_make_writable(skb, iphdroff + sizeof(*ipv6h)))
385 		return false;
386 
387 	ipv6h = (void *)skb->data + iphdroff;
388 	nexthdr = ipv6h->nexthdr;
389 	hdroff = ipv6_skip_exthdr(skb, iphdroff + sizeof(*ipv6h),
390 				  &nexthdr, &frag_off);
391 	if (hdroff < 0)
392 		goto manip_addr;
393 
394 	if ((frag_off & htons(~0x7)) == 0 &&
395 	    !l4proto_manip_pkt(skb, iphdroff, hdroff, target, maniptype))
396 		return false;
397 
398 	/* must reload, offset might have changed */
399 	ipv6h = (void *)skb->data + iphdroff;
400 
401 manip_addr:
402 	if (maniptype == NF_NAT_MANIP_SRC)
403 		ipv6h->saddr = target->src.u3.in6;
404 	else
405 		ipv6h->daddr = target->dst.u3.in6;
406 
407 #endif
408 	return true;
409 }
410 
411 unsigned int nf_nat_manip_pkt(struct sk_buff *skb, struct nf_conn *ct,
412 			      enum nf_nat_manip_type mtype,
413 			      enum ip_conntrack_dir dir)
414 {
415 	struct nf_conntrack_tuple target;
416 
417 	/* We are aiming to look like inverse of other direction. */
418 	nf_ct_invert_tuple(&target, &ct->tuplehash[!dir].tuple);
419 
420 	switch (target.src.l3num) {
421 	case NFPROTO_IPV6:
422 		if (nf_nat_ipv6_manip_pkt(skb, 0, &target, mtype))
423 			return NF_ACCEPT;
424 		break;
425 	case NFPROTO_IPV4:
426 		if (nf_nat_ipv4_manip_pkt(skb, 0, &target, mtype))
427 			return NF_ACCEPT;
428 		break;
429 	default:
430 		WARN_ON_ONCE(1);
431 		break;
432 	}
433 
434 	return NF_DROP;
435 }
436 
437 static void nf_nat_ipv4_csum_update(struct sk_buff *skb,
438 				    unsigned int iphdroff, __sum16 *check,
439 				    const struct nf_conntrack_tuple *t,
440 				    enum nf_nat_manip_type maniptype)
441 {
442 	struct iphdr *iph = (struct iphdr *)(skb->data + iphdroff);
443 	__be32 oldip, newip;
444 
445 	if (maniptype == NF_NAT_MANIP_SRC) {
446 		oldip = iph->saddr;
447 		newip = t->src.u3.ip;
448 	} else {
449 		oldip = iph->daddr;
450 		newip = t->dst.u3.ip;
451 	}
452 	inet_proto_csum_replace4(check, skb, oldip, newip, true);
453 }
454 
455 static void nf_nat_ipv6_csum_update(struct sk_buff *skb,
456 				    unsigned int iphdroff, __sum16 *check,
457 				    const struct nf_conntrack_tuple *t,
458 				    enum nf_nat_manip_type maniptype)
459 {
460 #if IS_ENABLED(CONFIG_IPV6)
461 	const struct ipv6hdr *ipv6h = (struct ipv6hdr *)(skb->data + iphdroff);
462 	const struct in6_addr *oldip, *newip;
463 
464 	if (maniptype == NF_NAT_MANIP_SRC) {
465 		oldip = &ipv6h->saddr;
466 		newip = &t->src.u3.in6;
467 	} else {
468 		oldip = &ipv6h->daddr;
469 		newip = &t->dst.u3.in6;
470 	}
471 	inet_proto_csum_replace16(check, skb, oldip->s6_addr32,
472 				  newip->s6_addr32, true);
473 #endif
474 }
475 
476 static void nf_csum_update(struct sk_buff *skb,
477 			   unsigned int iphdroff, __sum16 *check,
478 			   const struct nf_conntrack_tuple *t,
479 			   enum nf_nat_manip_type maniptype)
480 {
481 	switch (t->src.l3num) {
482 	case NFPROTO_IPV4:
483 		nf_nat_ipv4_csum_update(skb, iphdroff, check, t, maniptype);
484 		return;
485 	case NFPROTO_IPV6:
486 		nf_nat_ipv6_csum_update(skb, iphdroff, check, t, maniptype);
487 		return;
488 	}
489 }
490 
491 static void nf_nat_ipv4_csum_recalc(struct sk_buff *skb,
492 				    u8 proto, void *data, __sum16 *check,
493 				    int datalen, int oldlen)
494 {
495 	if (skb->ip_summed != CHECKSUM_PARTIAL) {
496 		const struct iphdr *iph = ip_hdr(skb);
497 
498 		skb->ip_summed = CHECKSUM_PARTIAL;
499 		skb->csum_start = skb_headroom(skb) + skb_network_offset(skb) +
500 			ip_hdrlen(skb);
501 		skb->csum_offset = (void *)check - data;
502 		*check = ~csum_tcpudp_magic(iph->saddr, iph->daddr, datalen,
503 					    proto, 0);
504 	} else {
505 		inet_proto_csum_replace2(check, skb,
506 					 htons(oldlen), htons(datalen), true);
507 	}
508 }
509 
510 #if IS_ENABLED(CONFIG_IPV6)
511 static void nf_nat_ipv6_csum_recalc(struct sk_buff *skb,
512 				    u8 proto, void *data, __sum16 *check,
513 				    int datalen, int oldlen)
514 {
515 	if (skb->ip_summed != CHECKSUM_PARTIAL) {
516 		const struct ipv6hdr *ipv6h = ipv6_hdr(skb);
517 
518 		skb->ip_summed = CHECKSUM_PARTIAL;
519 		skb->csum_start = skb_headroom(skb) + skb_network_offset(skb) +
520 			(data - (void *)skb->data);
521 		skb->csum_offset = (void *)check - data;
522 		*check = ~csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr,
523 					  datalen, proto, 0);
524 	} else {
525 		inet_proto_csum_replace2(check, skb,
526 					 htons(oldlen), htons(datalen), true);
527 	}
528 }
529 #endif
530 
531 void nf_nat_csum_recalc(struct sk_buff *skb,
532 			u8 nfproto, u8 proto, void *data, __sum16 *check,
533 			int datalen, int oldlen)
534 {
535 	switch (nfproto) {
536 	case NFPROTO_IPV4:
537 		nf_nat_ipv4_csum_recalc(skb, proto, data, check,
538 					datalen, oldlen);
539 		return;
540 #if IS_ENABLED(CONFIG_IPV6)
541 	case NFPROTO_IPV6:
542 		nf_nat_ipv6_csum_recalc(skb, proto, data, check,
543 					datalen, oldlen);
544 		return;
545 #endif
546 	}
547 
548 	WARN_ON_ONCE(1);
549 }
550 
551 int nf_nat_icmp_reply_translation(struct sk_buff *skb,
552 				  struct nf_conn *ct,
553 				  enum ip_conntrack_info ctinfo,
554 				  unsigned int hooknum)
555 {
556 	struct {
557 		struct icmphdr	icmp;
558 		struct iphdr	ip;
559 	} *inside;
560 	enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
561 	enum nf_nat_manip_type manip = HOOK2MANIP(hooknum);
562 	unsigned int hdrlen = ip_hdrlen(skb);
563 	struct nf_conntrack_tuple target;
564 	unsigned long statusbit;
565 
566 	WARN_ON(ctinfo != IP_CT_RELATED && ctinfo != IP_CT_RELATED_REPLY);
567 
568 	if (!skb_make_writable(skb, hdrlen + sizeof(*inside)))
569 		return 0;
570 	if (nf_ip_checksum(skb, hooknum, hdrlen, 0))
571 		return 0;
572 
573 	inside = (void *)skb->data + hdrlen;
574 	if (inside->icmp.type == ICMP_REDIRECT) {
575 		if ((ct->status & IPS_NAT_DONE_MASK) != IPS_NAT_DONE_MASK)
576 			return 0;
577 		if (ct->status & IPS_NAT_MASK)
578 			return 0;
579 	}
580 
581 	if (manip == NF_NAT_MANIP_SRC)
582 		statusbit = IPS_SRC_NAT;
583 	else
584 		statusbit = IPS_DST_NAT;
585 
586 	/* Invert if this is reply direction */
587 	if (dir == IP_CT_DIR_REPLY)
588 		statusbit ^= IPS_NAT_MASK;
589 
590 	if (!(ct->status & statusbit))
591 		return 1;
592 
593 	if (!nf_nat_ipv4_manip_pkt(skb, hdrlen + sizeof(inside->icmp),
594 				   &ct->tuplehash[!dir].tuple, !manip))
595 		return 0;
596 
597 	if (skb->ip_summed != CHECKSUM_PARTIAL) {
598 		/* Reloading "inside" here since manip_pkt may reallocate */
599 		inside = (void *)skb->data + hdrlen;
600 		inside->icmp.checksum = 0;
601 		inside->icmp.checksum =
602 			csum_fold(skb_checksum(skb, hdrlen,
603 					       skb->len - hdrlen, 0));
604 	}
605 
606 	/* Change outer to look like the reply to an incoming packet */
607 	nf_ct_invert_tuple(&target, &ct->tuplehash[!dir].tuple);
608 	target.dst.protonum = IPPROTO_ICMP;
609 	if (!nf_nat_ipv4_manip_pkt(skb, 0, &target, manip))
610 		return 0;
611 
612 	return 1;
613 }
614 EXPORT_SYMBOL_GPL(nf_nat_icmp_reply_translation);
615 
616 static unsigned int
617 nf_nat_ipv4_fn(void *priv, struct sk_buff *skb,
618 	       const struct nf_hook_state *state)
619 {
620 	struct nf_conn *ct;
621 	enum ip_conntrack_info ctinfo;
622 
623 	ct = nf_ct_get(skb, &ctinfo);
624 	if (!ct)
625 		return NF_ACCEPT;
626 
627 	if (ctinfo == IP_CT_RELATED || ctinfo == IP_CT_RELATED_REPLY) {
628 		if (ip_hdr(skb)->protocol == IPPROTO_ICMP) {
629 			if (!nf_nat_icmp_reply_translation(skb, ct, ctinfo,
630 							   state->hook))
631 				return NF_DROP;
632 			else
633 				return NF_ACCEPT;
634 		}
635 	}
636 
637 	return nf_nat_inet_fn(priv, skb, state);
638 }
639 
640 static unsigned int
641 nf_nat_ipv4_in(void *priv, struct sk_buff *skb,
642 	       const struct nf_hook_state *state)
643 {
644 	unsigned int ret;
645 	__be32 daddr = ip_hdr(skb)->daddr;
646 
647 	ret = nf_nat_ipv4_fn(priv, skb, state);
648 	if (ret == NF_ACCEPT && daddr != ip_hdr(skb)->daddr)
649 		skb_dst_drop(skb);
650 
651 	return ret;
652 }
653 
654 static unsigned int
655 nf_nat_ipv4_out(void *priv, struct sk_buff *skb,
656 		const struct nf_hook_state *state)
657 {
658 #ifdef CONFIG_XFRM
659 	const struct nf_conn *ct;
660 	enum ip_conntrack_info ctinfo;
661 	int err;
662 #endif
663 	unsigned int ret;
664 
665 	ret = nf_nat_ipv4_fn(priv, skb, state);
666 #ifdef CONFIG_XFRM
667 	if (ret != NF_ACCEPT)
668 		return ret;
669 
670 	if (IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED)
671 		return ret;
672 
673 	ct = nf_ct_get(skb, &ctinfo);
674 	if (ct) {
675 		enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
676 
677 		if (ct->tuplehash[dir].tuple.src.u3.ip !=
678 		     ct->tuplehash[!dir].tuple.dst.u3.ip ||
679 		    (ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMP &&
680 		     ct->tuplehash[dir].tuple.src.u.all !=
681 		     ct->tuplehash[!dir].tuple.dst.u.all)) {
682 			err = nf_xfrm_me_harder(state->net, skb, AF_INET);
683 			if (err < 0)
684 				ret = NF_DROP_ERR(err);
685 		}
686 	}
687 #endif
688 	return ret;
689 }
690 
691 static unsigned int
692 nf_nat_ipv4_local_fn(void *priv, struct sk_buff *skb,
693 		     const struct nf_hook_state *state)
694 {
695 	const struct nf_conn *ct;
696 	enum ip_conntrack_info ctinfo;
697 	unsigned int ret;
698 	int err;
699 
700 	ret = nf_nat_ipv4_fn(priv, skb, state);
701 	if (ret != NF_ACCEPT)
702 		return ret;
703 
704 	ct = nf_ct_get(skb, &ctinfo);
705 	if (ct) {
706 		enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
707 
708 		if (ct->tuplehash[dir].tuple.dst.u3.ip !=
709 		    ct->tuplehash[!dir].tuple.src.u3.ip) {
710 			err = ip_route_me_harder(state->net, skb, RTN_UNSPEC);
711 			if (err < 0)
712 				ret = NF_DROP_ERR(err);
713 		}
714 #ifdef CONFIG_XFRM
715 		else if (!(IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED) &&
716 			 ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMP &&
717 			 ct->tuplehash[dir].tuple.dst.u.all !=
718 			 ct->tuplehash[!dir].tuple.src.u.all) {
719 			err = nf_xfrm_me_harder(state->net, skb, AF_INET);
720 			if (err < 0)
721 				ret = NF_DROP_ERR(err);
722 		}
723 #endif
724 	}
725 	return ret;
726 }
727 
728 const struct nf_hook_ops nf_nat_ipv4_ops[] = {
729 	/* Before packet filtering, change destination */
730 	{
731 		.hook		= nf_nat_ipv4_in,
732 		.pf		= NFPROTO_IPV4,
733 		.hooknum	= NF_INET_PRE_ROUTING,
734 		.priority	= NF_IP_PRI_NAT_DST,
735 	},
736 	/* After packet filtering, change source */
737 	{
738 		.hook		= nf_nat_ipv4_out,
739 		.pf		= NFPROTO_IPV4,
740 		.hooknum	= NF_INET_POST_ROUTING,
741 		.priority	= NF_IP_PRI_NAT_SRC,
742 	},
743 	/* Before packet filtering, change destination */
744 	{
745 		.hook		= nf_nat_ipv4_local_fn,
746 		.pf		= NFPROTO_IPV4,
747 		.hooknum	= NF_INET_LOCAL_OUT,
748 		.priority	= NF_IP_PRI_NAT_DST,
749 	},
750 	/* After packet filtering, change source */
751 	{
752 		.hook		= nf_nat_ipv4_fn,
753 		.pf		= NFPROTO_IPV4,
754 		.hooknum	= NF_INET_LOCAL_IN,
755 		.priority	= NF_IP_PRI_NAT_SRC,
756 	},
757 };
758 
759 int nf_nat_ipv4_register_fn(struct net *net, const struct nf_hook_ops *ops)
760 {
761 	return nf_nat_register_fn(net, ops->pf, ops, nf_nat_ipv4_ops,
762 				  ARRAY_SIZE(nf_nat_ipv4_ops));
763 }
764 EXPORT_SYMBOL_GPL(nf_nat_ipv4_register_fn);
765 
766 void nf_nat_ipv4_unregister_fn(struct net *net, const struct nf_hook_ops *ops)
767 {
768 	nf_nat_unregister_fn(net, ops->pf, ops, ARRAY_SIZE(nf_nat_ipv4_ops));
769 }
770 EXPORT_SYMBOL_GPL(nf_nat_ipv4_unregister_fn);
771 
772 #if IS_ENABLED(CONFIG_IPV6)
773 int nf_nat_icmpv6_reply_translation(struct sk_buff *skb,
774 				    struct nf_conn *ct,
775 				    enum ip_conntrack_info ctinfo,
776 				    unsigned int hooknum,
777 				    unsigned int hdrlen)
778 {
779 	struct {
780 		struct icmp6hdr	icmp6;
781 		struct ipv6hdr	ip6;
782 	} *inside;
783 	enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
784 	enum nf_nat_manip_type manip = HOOK2MANIP(hooknum);
785 	struct nf_conntrack_tuple target;
786 	unsigned long statusbit;
787 
788 	WARN_ON(ctinfo != IP_CT_RELATED && ctinfo != IP_CT_RELATED_REPLY);
789 
790 	if (!skb_make_writable(skb, hdrlen + sizeof(*inside)))
791 		return 0;
792 	if (nf_ip6_checksum(skb, hooknum, hdrlen, IPPROTO_ICMPV6))
793 		return 0;
794 
795 	inside = (void *)skb->data + hdrlen;
796 	if (inside->icmp6.icmp6_type == NDISC_REDIRECT) {
797 		if ((ct->status & IPS_NAT_DONE_MASK) != IPS_NAT_DONE_MASK)
798 			return 0;
799 		if (ct->status & IPS_NAT_MASK)
800 			return 0;
801 	}
802 
803 	if (manip == NF_NAT_MANIP_SRC)
804 		statusbit = IPS_SRC_NAT;
805 	else
806 		statusbit = IPS_DST_NAT;
807 
808 	/* Invert if this is reply direction */
809 	if (dir == IP_CT_DIR_REPLY)
810 		statusbit ^= IPS_NAT_MASK;
811 
812 	if (!(ct->status & statusbit))
813 		return 1;
814 
815 	if (!nf_nat_ipv6_manip_pkt(skb, hdrlen + sizeof(inside->icmp6),
816 				   &ct->tuplehash[!dir].tuple, !manip))
817 		return 0;
818 
819 	if (skb->ip_summed != CHECKSUM_PARTIAL) {
820 		struct ipv6hdr *ipv6h = ipv6_hdr(skb);
821 
822 		inside = (void *)skb->data + hdrlen;
823 		inside->icmp6.icmp6_cksum = 0;
824 		inside->icmp6.icmp6_cksum =
825 			csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr,
826 					skb->len - hdrlen, IPPROTO_ICMPV6,
827 					skb_checksum(skb, hdrlen,
828 						     skb->len - hdrlen, 0));
829 	}
830 
831 	nf_ct_invert_tuple(&target, &ct->tuplehash[!dir].tuple);
832 	target.dst.protonum = IPPROTO_ICMPV6;
833 	if (!nf_nat_ipv6_manip_pkt(skb, 0, &target, manip))
834 		return 0;
835 
836 	return 1;
837 }
838 EXPORT_SYMBOL_GPL(nf_nat_icmpv6_reply_translation);
839 
840 static unsigned int
841 nf_nat_ipv6_fn(void *priv, struct sk_buff *skb,
842 	       const struct nf_hook_state *state)
843 {
844 	struct nf_conn *ct;
845 	enum ip_conntrack_info ctinfo;
846 	__be16 frag_off;
847 	int hdrlen;
848 	u8 nexthdr;
849 
850 	ct = nf_ct_get(skb, &ctinfo);
851 	/* Can't track?  It's not due to stress, or conntrack would
852 	 * have dropped it.  Hence it's the user's responsibilty to
853 	 * packet filter it out, or implement conntrack/NAT for that
854 	 * protocol. 8) --RR
855 	 */
856 	if (!ct)
857 		return NF_ACCEPT;
858 
859 	if (ctinfo == IP_CT_RELATED || ctinfo == IP_CT_RELATED_REPLY) {
860 		nexthdr = ipv6_hdr(skb)->nexthdr;
861 		hdrlen = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr),
862 					  &nexthdr, &frag_off);
863 
864 		if (hdrlen >= 0 && nexthdr == IPPROTO_ICMPV6) {
865 			if (!nf_nat_icmpv6_reply_translation(skb, ct, ctinfo,
866 							     state->hook,
867 							     hdrlen))
868 				return NF_DROP;
869 			else
870 				return NF_ACCEPT;
871 		}
872 	}
873 
874 	return nf_nat_inet_fn(priv, skb, state);
875 }
876 
877 static unsigned int
878 nf_nat_ipv6_in(void *priv, struct sk_buff *skb,
879 	       const struct nf_hook_state *state)
880 {
881 	unsigned int ret;
882 	struct in6_addr daddr = ipv6_hdr(skb)->daddr;
883 
884 	ret = nf_nat_ipv6_fn(priv, skb, state);
885 	if (ret != NF_DROP && ret != NF_STOLEN &&
886 	    ipv6_addr_cmp(&daddr, &ipv6_hdr(skb)->daddr))
887 		skb_dst_drop(skb);
888 
889 	return ret;
890 }
891 
892 static unsigned int
893 nf_nat_ipv6_out(void *priv, struct sk_buff *skb,
894 		const struct nf_hook_state *state)
895 {
896 #ifdef CONFIG_XFRM
897 	const struct nf_conn *ct;
898 	enum ip_conntrack_info ctinfo;
899 	int err;
900 #endif
901 	unsigned int ret;
902 
903 	ret = nf_nat_ipv6_fn(priv, skb, state);
904 #ifdef CONFIG_XFRM
905 	if (ret != NF_ACCEPT)
906 		return ret;
907 
908 	if (IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED)
909 		return ret;
910 	ct = nf_ct_get(skb, &ctinfo);
911 	if (ct) {
912 		enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
913 
914 		if (!nf_inet_addr_cmp(&ct->tuplehash[dir].tuple.src.u3,
915 				      &ct->tuplehash[!dir].tuple.dst.u3) ||
916 		    (ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMPV6 &&
917 		     ct->tuplehash[dir].tuple.src.u.all !=
918 		     ct->tuplehash[!dir].tuple.dst.u.all)) {
919 			err = nf_xfrm_me_harder(state->net, skb, AF_INET6);
920 			if (err < 0)
921 				ret = NF_DROP_ERR(err);
922 		}
923 	}
924 #endif
925 
926 	return ret;
927 }
928 
929 static unsigned int
930 nf_nat_ipv6_local_fn(void *priv, struct sk_buff *skb,
931 		     const struct nf_hook_state *state)
932 {
933 	const struct nf_conn *ct;
934 	enum ip_conntrack_info ctinfo;
935 	unsigned int ret;
936 	int err;
937 
938 	ret = nf_nat_ipv6_fn(priv, skb, state);
939 	if (ret != NF_ACCEPT)
940 		return ret;
941 
942 	ct = nf_ct_get(skb, &ctinfo);
943 	if (ct) {
944 		enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
945 
946 		if (!nf_inet_addr_cmp(&ct->tuplehash[dir].tuple.dst.u3,
947 				      &ct->tuplehash[!dir].tuple.src.u3)) {
948 			err = nf_ip6_route_me_harder(state->net, skb);
949 			if (err < 0)
950 				ret = NF_DROP_ERR(err);
951 		}
952 #ifdef CONFIG_XFRM
953 		else if (!(IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) &&
954 			 ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMPV6 &&
955 			 ct->tuplehash[dir].tuple.dst.u.all !=
956 			 ct->tuplehash[!dir].tuple.src.u.all) {
957 			err = nf_xfrm_me_harder(state->net, skb, AF_INET6);
958 			if (err < 0)
959 				ret = NF_DROP_ERR(err);
960 		}
961 #endif
962 	}
963 
964 	return ret;
965 }
966 
967 const struct nf_hook_ops nf_nat_ipv6_ops[] = {
968 	/* Before packet filtering, change destination */
969 	{
970 		.hook		= nf_nat_ipv6_in,
971 		.pf		= NFPROTO_IPV6,
972 		.hooknum	= NF_INET_PRE_ROUTING,
973 		.priority	= NF_IP6_PRI_NAT_DST,
974 	},
975 	/* After packet filtering, change source */
976 	{
977 		.hook		= nf_nat_ipv6_out,
978 		.pf		= NFPROTO_IPV6,
979 		.hooknum	= NF_INET_POST_ROUTING,
980 		.priority	= NF_IP6_PRI_NAT_SRC,
981 	},
982 	/* Before packet filtering, change destination */
983 	{
984 		.hook		= nf_nat_ipv6_local_fn,
985 		.pf		= NFPROTO_IPV6,
986 		.hooknum	= NF_INET_LOCAL_OUT,
987 		.priority	= NF_IP6_PRI_NAT_DST,
988 	},
989 	/* After packet filtering, change source */
990 	{
991 		.hook		= nf_nat_ipv6_fn,
992 		.pf		= NFPROTO_IPV6,
993 		.hooknum	= NF_INET_LOCAL_IN,
994 		.priority	= NF_IP6_PRI_NAT_SRC,
995 	},
996 };
997 
998 int nf_nat_ipv6_register_fn(struct net *net, const struct nf_hook_ops *ops)
999 {
1000 	return nf_nat_register_fn(net, ops->pf, ops, nf_nat_ipv6_ops,
1001 				  ARRAY_SIZE(nf_nat_ipv6_ops));
1002 }
1003 EXPORT_SYMBOL_GPL(nf_nat_ipv6_register_fn);
1004 
1005 void nf_nat_ipv6_unregister_fn(struct net *net, const struct nf_hook_ops *ops)
1006 {
1007 	nf_nat_unregister_fn(net, ops->pf, ops, ARRAY_SIZE(nf_nat_ipv6_ops));
1008 }
1009 EXPORT_SYMBOL_GPL(nf_nat_ipv6_unregister_fn);
1010 #endif /* CONFIG_IPV6 */
1011 
1012 #if defined(CONFIG_NF_TABLES_INET) && IS_ENABLED(CONFIG_NFT_NAT)
1013 int nf_nat_inet_register_fn(struct net *net, const struct nf_hook_ops *ops)
1014 {
1015 	int ret;
1016 
1017 	if (WARN_ON_ONCE(ops->pf != NFPROTO_INET))
1018 		return -EINVAL;
1019 
1020 	ret = nf_nat_register_fn(net, NFPROTO_IPV6, ops, nf_nat_ipv6_ops,
1021 				 ARRAY_SIZE(nf_nat_ipv6_ops));
1022 	if (ret)
1023 		return ret;
1024 
1025 	ret = nf_nat_register_fn(net, NFPROTO_IPV4, ops, nf_nat_ipv4_ops,
1026 				 ARRAY_SIZE(nf_nat_ipv4_ops));
1027 	if (ret)
1028 		nf_nat_ipv6_unregister_fn(net, ops);
1029 
1030 	return ret;
1031 }
1032 EXPORT_SYMBOL_GPL(nf_nat_inet_register_fn);
1033 
1034 void nf_nat_inet_unregister_fn(struct net *net, const struct nf_hook_ops *ops)
1035 {
1036 	nf_nat_unregister_fn(net, NFPROTO_IPV4, ops, ARRAY_SIZE(nf_nat_ipv4_ops));
1037 	nf_nat_unregister_fn(net, NFPROTO_IPV6, ops, ARRAY_SIZE(nf_nat_ipv6_ops));
1038 }
1039 EXPORT_SYMBOL_GPL(nf_nat_inet_unregister_fn);
1040 #endif /* NFT INET NAT */
1041