xref: /linux/net/netfilter/ipvs/ip_vs_nfct.c (revision 975ef7ff81bb000af6e6c8e63e81f89f3468dcf7)
1 /*
2  * ip_vs_nfct.c:	Netfilter connection tracking support for IPVS
3  *
4  * Portions Copyright (C) 2001-2002
5  * Antefacto Ltd, 181 Parnell St, Dublin 1, Ireland.
6  *
7  * Portions Copyright (C) 2003-2010
8  * Julian Anastasov
9  *
10  *
11  * This code is free software; you can redistribute it and/or modify
12  * it under the terms of the GNU General Public License as published by
13  * the Free Software Foundation; either version 2 of the License, or
14  * (at your option) any later version.
15  *
16  * This program is distributed in the hope that it will be useful,
17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19  * GNU General Public License for more details.
20  *
21  * You should have received a copy of the GNU General Public License
22  * along with this program; if not, see <http://www.gnu.org/licenses/>.
23  *
24  *
25  * Authors:
26  * Ben North <ben@redfrontdoor.org>
27  * Julian Anastasov <ja@ssi.bg>		Reorganize and sync with latest kernels
28  * Hannes Eder <heder@google.com>	Extend NFCT support for FTP, ipvs match
29  *
30  *
31  * Current status:
32  *
33  * - provide conntrack confirmation for new and related connections, by
34  * this way we can see their proper conntrack state in all hooks
35  * - support for all forwarding methods, not only NAT
36  * - FTP support (NAT), ability to support other NAT apps with expectations
37  * - to correctly create expectations for related NAT connections the proper
38  * NF conntrack support must be already installed, eg. ip_vs_ftp requires
39  * nf_conntrack_ftp ... iptables_nat for the same ports (but no iptables
40  * NAT rules are needed)
41  * - alter reply for NAT when forwarding packet in original direction:
42  * conntrack from client in NEW or RELATED (Passive FTP DATA) state or
43  * when RELATED conntrack is created from real server (Active FTP DATA)
44  * - if iptables_nat is not loaded the Passive FTP will not work (the
45  * PASV response can not be NAT-ed) but Active FTP should work
46  *
47  */
48 
49 #define KMSG_COMPONENT "IPVS"
50 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
51 
52 #include <linux/module.h>
53 #include <linux/types.h>
54 #include <linux/kernel.h>
55 #include <linux/errno.h>
56 #include <linux/compiler.h>
57 #include <linux/vmalloc.h>
58 #include <linux/skbuff.h>
59 #include <net/ip.h>
60 #include <linux/netfilter.h>
61 #include <linux/netfilter_ipv4.h>
62 #include <net/ip_vs.h>
63 #include <net/netfilter/nf_conntrack_core.h>
64 #include <net/netfilter/nf_conntrack_expect.h>
65 #include <net/netfilter/nf_conntrack_seqadj.h>
66 #include <net/netfilter/nf_conntrack_helper.h>
67 #include <net/netfilter/nf_conntrack_zones.h>
68 
69 
70 #define FMT_TUPLE	"%s:%u->%s:%u/%u"
71 #define ARG_TUPLE(T)	IP_VS_DBG_ADDR((T)->src.l3num, &(T)->src.u3),	\
72 			ntohs((T)->src.u.all),				\
73 			IP_VS_DBG_ADDR((T)->src.l3num, &(T)->dst.u3),	\
74 			ntohs((T)->dst.u.all),				\
75 			(T)->dst.protonum
76 
77 #define FMT_CONN	"%s:%u->%s:%u->%s:%u/%u:%u"
78 #define ARG_CONN(C)	IP_VS_DBG_ADDR((C)->af, &((C)->caddr)),		\
79 			ntohs((C)->cport),				\
80 			IP_VS_DBG_ADDR((C)->af, &((C)->vaddr)),		\
81 			ntohs((C)->vport),				\
82 			IP_VS_DBG_ADDR((C)->daf, &((C)->daddr)),	\
83 			ntohs((C)->dport),				\
84 			(C)->protocol, (C)->state
85 
86 void
87 ip_vs_update_conntrack(struct sk_buff *skb, struct ip_vs_conn *cp, int outin)
88 {
89 	enum ip_conntrack_info ctinfo;
90 	struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
91 	struct nf_conntrack_tuple new_tuple;
92 
93 	if (ct == NULL || nf_ct_is_confirmed(ct) ||
94 	    nf_ct_is_dying(ct))
95 		return;
96 
97 	/* Never alter conntrack for non-NAT conns */
98 	if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ)
99 		return;
100 
101 	/* Never alter conntrack for OPS conns (no reply is expected) */
102 	if (cp->flags & IP_VS_CONN_F_ONE_PACKET)
103 		return;
104 
105 	/* Alter reply only in original direction */
106 	if (CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL)
107 		return;
108 
109 	/* Applications may adjust TCP seqs */
110 	if (cp->app && nf_ct_protonum(ct) == IPPROTO_TCP &&
111 	    !nfct_seqadj(ct) && !nfct_seqadj_ext_add(ct))
112 		return;
113 
114 	/*
115 	 * The connection is not yet in the hashtable, so we update it.
116 	 * CIP->VIP will remain the same, so leave the tuple in
117 	 * IP_CT_DIR_ORIGINAL untouched.  When the reply comes back from the
118 	 * real-server we will see RIP->DIP.
119 	 */
120 	new_tuple = ct->tuplehash[IP_CT_DIR_REPLY].tuple;
121 	/*
122 	 * This will also take care of UDP and other protocols.
123 	 */
124 	if (outin) {
125 		new_tuple.src.u3 = cp->daddr;
126 		if (new_tuple.dst.protonum != IPPROTO_ICMP &&
127 		    new_tuple.dst.protonum != IPPROTO_ICMPV6)
128 			new_tuple.src.u.tcp.port = cp->dport;
129 	} else {
130 		new_tuple.dst.u3 = cp->vaddr;
131 		if (new_tuple.dst.protonum != IPPROTO_ICMP &&
132 		    new_tuple.dst.protonum != IPPROTO_ICMPV6)
133 			new_tuple.dst.u.tcp.port = cp->vport;
134 	}
135 	IP_VS_DBG_BUF(7, "%s: Updating conntrack ct=%p, status=0x%lX, "
136 		      "ctinfo=%d, old reply=" FMT_TUPLE "\n",
137 		      __func__, ct, ct->status, ctinfo,
138 		      ARG_TUPLE(&ct->tuplehash[IP_CT_DIR_REPLY].tuple));
139 	IP_VS_DBG_BUF(7, "%s: Updating conntrack ct=%p, status=0x%lX, "
140 		      "ctinfo=%d, new reply=" FMT_TUPLE "\n",
141 		      __func__, ct, ct->status, ctinfo,
142 		      ARG_TUPLE(&new_tuple));
143 	nf_conntrack_alter_reply(ct, &new_tuple);
144 	IP_VS_DBG_BUF(7, "%s: Updated conntrack ct=%p for cp=" FMT_CONN "\n",
145 		      __func__, ct, ARG_CONN(cp));
146 }
147 
148 int ip_vs_confirm_conntrack(struct sk_buff *skb)
149 {
150 	return nf_conntrack_confirm(skb);
151 }
152 
153 /*
154  * Called from init_conntrack() as expectfn handler.
155  */
156 static void ip_vs_nfct_expect_callback(struct nf_conn *ct,
157 	struct nf_conntrack_expect *exp)
158 {
159 	struct nf_conntrack_tuple *orig, new_reply;
160 	struct ip_vs_conn *cp;
161 	struct ip_vs_conn_param p;
162 	struct net *net = nf_ct_net(ct);
163 
164 	/*
165 	 * We assume that no NF locks are held before this callback.
166 	 * ip_vs_conn_out_get and ip_vs_conn_in_get should match their
167 	 * expectations even if they use wildcard values, now we provide the
168 	 * actual values from the newly created original conntrack direction.
169 	 * The conntrack is confirmed when packet reaches IPVS hooks.
170 	 */
171 
172 	/* RS->CLIENT */
173 	orig = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
174 	ip_vs_conn_fill_param(net_ipvs(net), exp->tuple.src.l3num, orig->dst.protonum,
175 			      &orig->src.u3, orig->src.u.tcp.port,
176 			      &orig->dst.u3, orig->dst.u.tcp.port, &p);
177 	cp = ip_vs_conn_out_get(&p);
178 	if (cp) {
179 		/* Change reply CLIENT->RS to CLIENT->VS */
180 		IP_VS_DBG_BUF(7, "%s: for ct=%p, status=0x%lX found inout cp="
181 			      FMT_CONN "\n",
182 			      __func__, ct, ct->status, ARG_CONN(cp));
183 		new_reply = ct->tuplehash[IP_CT_DIR_REPLY].tuple;
184 		IP_VS_DBG_BUF(7, "%s: ct=%p before alter: reply tuple="
185 			      FMT_TUPLE "\n",
186 			      __func__, ct, ARG_TUPLE(&new_reply));
187 		new_reply.dst.u3 = cp->vaddr;
188 		new_reply.dst.u.tcp.port = cp->vport;
189 		goto alter;
190 	}
191 
192 	/* CLIENT->VS */
193 	cp = ip_vs_conn_in_get(&p);
194 	if (cp) {
195 		/* Change reply VS->CLIENT to RS->CLIENT */
196 		IP_VS_DBG_BUF(7, "%s: for ct=%p, status=0x%lX found outin cp="
197 			      FMT_CONN "\n",
198 			      __func__, ct, ct->status, ARG_CONN(cp));
199 		new_reply = ct->tuplehash[IP_CT_DIR_REPLY].tuple;
200 		IP_VS_DBG_BUF(7, "%s: ct=%p before alter: reply tuple="
201 			      FMT_TUPLE "\n",
202 			      __func__, ct, ARG_TUPLE(&new_reply));
203 		new_reply.src.u3 = cp->daddr;
204 		new_reply.src.u.tcp.port = cp->dport;
205 		goto alter;
206 	}
207 
208 	IP_VS_DBG_BUF(7, "%s: ct=%p, status=0x%lX, tuple=" FMT_TUPLE
209 		      " - unknown expect\n",
210 		      __func__, ct, ct->status, ARG_TUPLE(orig));
211 	return;
212 
213 alter:
214 	/* Never alter conntrack for non-NAT conns */
215 	if (IP_VS_FWD_METHOD(cp) == IP_VS_CONN_F_MASQ)
216 		nf_conntrack_alter_reply(ct, &new_reply);
217 	ip_vs_conn_put(cp);
218 	return;
219 }
220 
221 /*
222  * Create NF conntrack expectation with wildcard (optional) source port.
223  * Then the default callback function will alter the reply and will confirm
224  * the conntrack entry when the first packet comes.
225  * Use port 0 to expect connection from any port.
226  */
227 void ip_vs_nfct_expect_related(struct sk_buff *skb, struct nf_conn *ct,
228 			       struct ip_vs_conn *cp, u_int8_t proto,
229 			       const __be16 port, int from_rs)
230 {
231 	struct nf_conntrack_expect *exp;
232 
233 	if (ct == NULL)
234 		return;
235 
236 	exp = nf_ct_expect_alloc(ct);
237 	if (!exp)
238 		return;
239 
240 	nf_ct_expect_init(exp, NF_CT_EXPECT_CLASS_DEFAULT, nf_ct_l3num(ct),
241 			from_rs ? &cp->daddr : &cp->caddr,
242 			from_rs ? &cp->caddr : &cp->vaddr,
243 			proto, port ? &port : NULL,
244 			from_rs ? &cp->cport : &cp->vport);
245 
246 	exp->expectfn = ip_vs_nfct_expect_callback;
247 
248 	IP_VS_DBG_BUF(7, "%s: ct=%p, expect tuple=" FMT_TUPLE "\n",
249 		      __func__, ct, ARG_TUPLE(&exp->tuple));
250 	nf_ct_expect_related(exp);
251 	nf_ct_expect_put(exp);
252 }
253 EXPORT_SYMBOL(ip_vs_nfct_expect_related);
254 
255 /*
256  * Our connection was terminated, try to drop the conntrack immediately
257  */
258 void ip_vs_conn_drop_conntrack(struct ip_vs_conn *cp)
259 {
260 	struct nf_conntrack_tuple_hash *h;
261 	struct nf_conn *ct;
262 	struct nf_conntrack_tuple tuple;
263 
264 	if (!cp->cport)
265 		return;
266 
267 	tuple = (struct nf_conntrack_tuple) {
268 		.dst = { .protonum = cp->protocol, .dir = IP_CT_DIR_ORIGINAL } };
269 	tuple.src.u3 = cp->caddr;
270 	tuple.src.u.all = cp->cport;
271 	tuple.src.l3num = cp->af;
272 	tuple.dst.u3 = cp->vaddr;
273 	tuple.dst.u.all = cp->vport;
274 
275 	IP_VS_DBG_BUF(7, "%s: dropping conntrack for conn " FMT_CONN "\n",
276 		      __func__, ARG_CONN(cp));
277 
278 	h = nf_conntrack_find_get(cp->ipvs->net, &nf_ct_zone_dflt, &tuple);
279 	if (h) {
280 		ct = nf_ct_tuplehash_to_ctrack(h);
281 		if (nf_ct_kill(ct)) {
282 			IP_VS_DBG_BUF(7, "%s: ct=%p deleted for tuple="
283 				      FMT_TUPLE "\n",
284 				      __func__, ct, ARG_TUPLE(&tuple));
285 		} else {
286 			IP_VS_DBG_BUF(7, "%s: ct=%p, no conntrack for tuple="
287 				      FMT_TUPLE "\n",
288 				      __func__, ct, ARG_TUPLE(&tuple));
289 		}
290 		nf_ct_put(ct);
291 	} else {
292 		IP_VS_DBG_BUF(7, "%s: no conntrack for tuple=" FMT_TUPLE "\n",
293 			      __func__, ARG_TUPLE(&tuple));
294 	}
295 }
296 
297