xref: /linux/net/netfilter/nf_queue.c (revision 4413e16d9d21673bb5048a2e542f1aaa00015c2e)
1 #include <linux/kernel.h>
2 #include <linux/slab.h>
3 #include <linux/init.h>
4 #include <linux/module.h>
5 #include <linux/proc_fs.h>
6 #include <linux/skbuff.h>
7 #include <linux/netfilter.h>
8 #include <linux/seq_file.h>
9 #include <linux/rcupdate.h>
10 #include <net/protocol.h>
11 #include <net/netfilter/nf_queue.h>
12 #include <net/dst.h>
13 
14 #include "nf_internals.h"
15 
16 /*
17  * A queue handler may be registered for each protocol.  Each is protected by
18  * long term mutex.  The handler must provide an an outfn() to accept packets
19  * for queueing and must reinject all packets it receives, no matter what.
20  */
21 static const struct nf_queue_handler __rcu *queue_handler[NFPROTO_NUMPROTO] __read_mostly;
22 
23 static DEFINE_MUTEX(queue_handler_mutex);
24 
25 /* return EBUSY when somebody else is registered, return EEXIST if the
26  * same handler is registered, return 0 in case of success. */
27 int nf_register_queue_handler(u_int8_t pf, const struct nf_queue_handler *qh)
28 {
29 	int ret;
30 	const struct nf_queue_handler *old;
31 
32 	if (pf >= ARRAY_SIZE(queue_handler))
33 		return -EINVAL;
34 
35 	mutex_lock(&queue_handler_mutex);
36 	old = rcu_dereference_protected(queue_handler[pf],
37 					lockdep_is_held(&queue_handler_mutex));
38 	if (old == qh)
39 		ret = -EEXIST;
40 	else if (old)
41 		ret = -EBUSY;
42 	else {
43 		rcu_assign_pointer(queue_handler[pf], qh);
44 		ret = 0;
45 	}
46 	mutex_unlock(&queue_handler_mutex);
47 
48 	return ret;
49 }
50 EXPORT_SYMBOL(nf_register_queue_handler);
51 
52 /* The caller must flush their queue before this */
53 int nf_unregister_queue_handler(u_int8_t pf, const struct nf_queue_handler *qh)
54 {
55 	const struct nf_queue_handler *old;
56 
57 	if (pf >= ARRAY_SIZE(queue_handler))
58 		return -EINVAL;
59 
60 	mutex_lock(&queue_handler_mutex);
61 	old = rcu_dereference_protected(queue_handler[pf],
62 					lockdep_is_held(&queue_handler_mutex));
63 	if (old && old != qh) {
64 		mutex_unlock(&queue_handler_mutex);
65 		return -EINVAL;
66 	}
67 
68 	RCU_INIT_POINTER(queue_handler[pf], NULL);
69 	mutex_unlock(&queue_handler_mutex);
70 
71 	synchronize_rcu();
72 
73 	return 0;
74 }
75 EXPORT_SYMBOL(nf_unregister_queue_handler);
76 
77 void nf_unregister_queue_handlers(const struct nf_queue_handler *qh)
78 {
79 	u_int8_t pf;
80 
81 	mutex_lock(&queue_handler_mutex);
82 	for (pf = 0; pf < ARRAY_SIZE(queue_handler); pf++)  {
83 		if (rcu_dereference_protected(
84 				queue_handler[pf],
85 				lockdep_is_held(&queue_handler_mutex)
86 				) == qh)
87 			RCU_INIT_POINTER(queue_handler[pf], NULL);
88 	}
89 	mutex_unlock(&queue_handler_mutex);
90 
91 	synchronize_rcu();
92 }
93 EXPORT_SYMBOL_GPL(nf_unregister_queue_handlers);
94 
95 static void nf_queue_entry_release_refs(struct nf_queue_entry *entry)
96 {
97 	/* Release those devices we held, or Alexey will kill me. */
98 	if (entry->indev)
99 		dev_put(entry->indev);
100 	if (entry->outdev)
101 		dev_put(entry->outdev);
102 #ifdef CONFIG_BRIDGE_NETFILTER
103 	if (entry->skb->nf_bridge) {
104 		struct nf_bridge_info *nf_bridge = entry->skb->nf_bridge;
105 
106 		if (nf_bridge->physindev)
107 			dev_put(nf_bridge->physindev);
108 		if (nf_bridge->physoutdev)
109 			dev_put(nf_bridge->physoutdev);
110 	}
111 #endif
112 	/* Drop reference to owner of hook which queued us. */
113 	module_put(entry->elem->owner);
114 }
115 
116 /*
117  * Any packet that leaves via this function must come back
118  * through nf_reinject().
119  */
120 static int __nf_queue(struct sk_buff *skb,
121 		      struct list_head *elem,
122 		      u_int8_t pf, unsigned int hook,
123 		      struct net_device *indev,
124 		      struct net_device *outdev,
125 		      int (*okfn)(struct sk_buff *),
126 		      unsigned int queuenum)
127 {
128 	int status = -ENOENT;
129 	struct nf_queue_entry *entry = NULL;
130 #ifdef CONFIG_BRIDGE_NETFILTER
131 	struct net_device *physindev;
132 	struct net_device *physoutdev;
133 #endif
134 	const struct nf_afinfo *afinfo;
135 	const struct nf_queue_handler *qh;
136 
137 	/* QUEUE == DROP if no one is waiting, to be safe. */
138 	rcu_read_lock();
139 
140 	qh = rcu_dereference(queue_handler[pf]);
141 	if (!qh) {
142 		status = -ESRCH;
143 		goto err_unlock;
144 	}
145 
146 	afinfo = nf_get_afinfo(pf);
147 	if (!afinfo)
148 		goto err_unlock;
149 
150 	entry = kmalloc(sizeof(*entry) + afinfo->route_key_size, GFP_ATOMIC);
151 	if (!entry) {
152 		status = -ENOMEM;
153 		goto err_unlock;
154 	}
155 
156 	*entry = (struct nf_queue_entry) {
157 		.skb	= skb,
158 		.elem	= list_entry(elem, struct nf_hook_ops, list),
159 		.pf	= pf,
160 		.hook	= hook,
161 		.indev	= indev,
162 		.outdev	= outdev,
163 		.okfn	= okfn,
164 	};
165 
166 	/* If it's going away, ignore hook. */
167 	if (!try_module_get(entry->elem->owner)) {
168 		status = -ECANCELED;
169 		goto err_unlock;
170 	}
171 	/* Bump dev refs so they don't vanish while packet is out */
172 	if (indev)
173 		dev_hold(indev);
174 	if (outdev)
175 		dev_hold(outdev);
176 #ifdef CONFIG_BRIDGE_NETFILTER
177 	if (skb->nf_bridge) {
178 		physindev = skb->nf_bridge->physindev;
179 		if (physindev)
180 			dev_hold(physindev);
181 		physoutdev = skb->nf_bridge->physoutdev;
182 		if (physoutdev)
183 			dev_hold(physoutdev);
184 	}
185 #endif
186 	skb_dst_force(skb);
187 	afinfo->saveroute(skb, entry);
188 	status = qh->outfn(entry, queuenum);
189 
190 	rcu_read_unlock();
191 
192 	if (status < 0) {
193 		nf_queue_entry_release_refs(entry);
194 		goto err;
195 	}
196 
197 	return 0;
198 
199 err_unlock:
200 	rcu_read_unlock();
201 err:
202 	kfree(entry);
203 	return status;
204 }
205 
206 #ifdef CONFIG_BRIDGE_NETFILTER
207 /* When called from bridge netfilter, skb->data must point to MAC header
208  * before calling skb_gso_segment(). Else, original MAC header is lost
209  * and segmented skbs will be sent to wrong destination.
210  */
211 static void nf_bridge_adjust_skb_data(struct sk_buff *skb)
212 {
213 	if (skb->nf_bridge)
214 		__skb_push(skb, skb->network_header - skb->mac_header);
215 }
216 
217 static void nf_bridge_adjust_segmented_data(struct sk_buff *skb)
218 {
219 	if (skb->nf_bridge)
220 		__skb_pull(skb, skb->network_header - skb->mac_header);
221 }
222 #else
223 #define nf_bridge_adjust_skb_data(s) do {} while (0)
224 #define nf_bridge_adjust_segmented_data(s) do {} while (0)
225 #endif
226 
227 int nf_queue(struct sk_buff *skb,
228 	     struct list_head *elem,
229 	     u_int8_t pf, unsigned int hook,
230 	     struct net_device *indev,
231 	     struct net_device *outdev,
232 	     int (*okfn)(struct sk_buff *),
233 	     unsigned int queuenum)
234 {
235 	struct sk_buff *segs;
236 	int err = -EINVAL;
237 	unsigned int queued;
238 
239 	if (!skb_is_gso(skb))
240 		return __nf_queue(skb, elem, pf, hook, indev, outdev, okfn,
241 				  queuenum);
242 
243 	switch (pf) {
244 	case NFPROTO_IPV4:
245 		skb->protocol = htons(ETH_P_IP);
246 		break;
247 	case NFPROTO_IPV6:
248 		skb->protocol = htons(ETH_P_IPV6);
249 		break;
250 	}
251 
252 	nf_bridge_adjust_skb_data(skb);
253 	segs = skb_gso_segment(skb, 0);
254 	/* Does not use PTR_ERR to limit the number of error codes that can be
255 	 * returned by nf_queue.  For instance, callers rely on -ECANCELED to mean
256 	 * 'ignore this hook'.
257 	 */
258 	if (IS_ERR(segs))
259 		goto out_err;
260 	queued = 0;
261 	err = 0;
262 	do {
263 		struct sk_buff *nskb = segs->next;
264 
265 		segs->next = NULL;
266 		if (err == 0) {
267 			nf_bridge_adjust_segmented_data(segs);
268 			err = __nf_queue(segs, elem, pf, hook, indev,
269 					   outdev, okfn, queuenum);
270 		}
271 		if (err == 0)
272 			queued++;
273 		else
274 			kfree_skb(segs);
275 		segs = nskb;
276 	} while (segs);
277 
278 	if (queued) {
279 		kfree_skb(skb);
280 		return 0;
281 	}
282   out_err:
283 	nf_bridge_adjust_segmented_data(skb);
284 	return err;
285 }
286 
287 void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict)
288 {
289 	struct sk_buff *skb = entry->skb;
290 	struct list_head *elem = &entry->elem->list;
291 	const struct nf_afinfo *afinfo;
292 	int err;
293 
294 	rcu_read_lock();
295 
296 	nf_queue_entry_release_refs(entry);
297 
298 	/* Continue traversal iff userspace said ok... */
299 	if (verdict == NF_REPEAT) {
300 		elem = elem->prev;
301 		verdict = NF_ACCEPT;
302 	}
303 
304 	if (verdict == NF_ACCEPT) {
305 		afinfo = nf_get_afinfo(entry->pf);
306 		if (!afinfo || afinfo->reroute(skb, entry) < 0)
307 			verdict = NF_DROP;
308 	}
309 
310 	if (verdict == NF_ACCEPT) {
311 	next_hook:
312 		verdict = nf_iterate(&nf_hooks[entry->pf][entry->hook],
313 				     skb, entry->hook,
314 				     entry->indev, entry->outdev, &elem,
315 				     entry->okfn, INT_MIN);
316 	}
317 
318 	switch (verdict & NF_VERDICT_MASK) {
319 	case NF_ACCEPT:
320 	case NF_STOP:
321 		local_bh_disable();
322 		entry->okfn(skb);
323 		local_bh_enable();
324 		break;
325 	case NF_QUEUE:
326 		err = __nf_queue(skb, elem, entry->pf, entry->hook,
327 				 entry->indev, entry->outdev, entry->okfn,
328 				 verdict >> NF_VERDICT_QBITS);
329 		if (err < 0) {
330 			if (err == -ECANCELED)
331 				goto next_hook;
332 			if (err == -ESRCH &&
333 			   (verdict & NF_VERDICT_FLAG_QUEUE_BYPASS))
334 				goto next_hook;
335 			kfree_skb(skb);
336 		}
337 		break;
338 	case NF_STOLEN:
339 		break;
340 	default:
341 		kfree_skb(skb);
342 	}
343 	rcu_read_unlock();
344 	kfree(entry);
345 }
346 EXPORT_SYMBOL(nf_reinject);
347 
348 #ifdef CONFIG_PROC_FS
349 static void *seq_start(struct seq_file *seq, loff_t *pos)
350 {
351 	if (*pos >= ARRAY_SIZE(queue_handler))
352 		return NULL;
353 
354 	return pos;
355 }
356 
357 static void *seq_next(struct seq_file *s, void *v, loff_t *pos)
358 {
359 	(*pos)++;
360 
361 	if (*pos >= ARRAY_SIZE(queue_handler))
362 		return NULL;
363 
364 	return pos;
365 }
366 
367 static void seq_stop(struct seq_file *s, void *v)
368 {
369 
370 }
371 
372 static int seq_show(struct seq_file *s, void *v)
373 {
374 	int ret;
375 	loff_t *pos = v;
376 	const struct nf_queue_handler *qh;
377 
378 	rcu_read_lock();
379 	qh = rcu_dereference(queue_handler[*pos]);
380 	if (!qh)
381 		ret = seq_printf(s, "%2lld NONE\n", *pos);
382 	else
383 		ret = seq_printf(s, "%2lld %s\n", *pos, qh->name);
384 	rcu_read_unlock();
385 
386 	return ret;
387 }
388 
389 static const struct seq_operations nfqueue_seq_ops = {
390 	.start	= seq_start,
391 	.next	= seq_next,
392 	.stop	= seq_stop,
393 	.show	= seq_show,
394 };
395 
396 static int nfqueue_open(struct inode *inode, struct file *file)
397 {
398 	return seq_open(file, &nfqueue_seq_ops);
399 }
400 
401 static const struct file_operations nfqueue_file_ops = {
402 	.owner	 = THIS_MODULE,
403 	.open	 = nfqueue_open,
404 	.read	 = seq_read,
405 	.llseek	 = seq_lseek,
406 	.release = seq_release,
407 };
408 #endif /* PROC_FS */
409 
410 
411 int __init netfilter_queue_init(void)
412 {
413 #ifdef CONFIG_PROC_FS
414 	if (!proc_create("nf_queue", S_IRUGO,
415 			 proc_net_netfilter, &nfqueue_file_ops))
416 		return -1;
417 #endif
418 	return 0;
419 }
420 
421