xref: /linux/net/netfilter/nft_set_rbtree.c (revision ab520be8cd5d56867fc95cfbc34b90880faf1f9d)
1 /*
2  * Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net>
3  *
4  * This program is free software; you can redistribute it and/or modify
5  * it under the terms of the GNU General Public License version 2 as
6  * published by the Free Software Foundation.
7  *
8  * Development of this code funded by Astaro AG (http://www.astaro.com/)
9  */
10 
11 #include <linux/kernel.h>
12 #include <linux/init.h>
13 #include <linux/module.h>
14 #include <linux/list.h>
15 #include <linux/rbtree.h>
16 #include <linux/netlink.h>
17 #include <linux/netfilter.h>
18 #include <linux/netfilter/nf_tables.h>
19 #include <net/netfilter/nf_tables.h>
20 
21 static DEFINE_SPINLOCK(nft_rbtree_lock);
22 
23 struct nft_rbtree {
24 	struct rb_root		root;
25 };
26 
27 struct nft_rbtree_elem {
28 	struct rb_node		node;
29 	struct nft_set_ext	ext;
30 };
31 
32 static bool nft_rbtree_interval_end(const struct nft_rbtree_elem *rbe)
33 {
34 	return nft_set_ext_exists(&rbe->ext, NFT_SET_EXT_FLAGS) &&
35 	       (*nft_set_ext_flags(&rbe->ext) & NFT_SET_ELEM_INTERVAL_END);
36 }
37 
38 static bool nft_rbtree_equal(const struct nft_set *set, const void *this,
39 			     const struct nft_rbtree_elem *interval)
40 {
41 	return memcmp(this, nft_set_ext_key(&interval->ext), set->klen) == 0;
42 }
43 
44 static bool nft_rbtree_lookup(const struct net *net, const struct nft_set *set,
45 			      const u32 *key, const struct nft_set_ext **ext)
46 {
47 	const struct nft_rbtree *priv = nft_set_priv(set);
48 	const struct nft_rbtree_elem *rbe, *interval = NULL;
49 	u8 genmask = nft_genmask_cur(net);
50 	const struct rb_node *parent;
51 	const void *this;
52 	int d;
53 
54 	spin_lock_bh(&nft_rbtree_lock);
55 	parent = priv->root.rb_node;
56 	while (parent != NULL) {
57 		rbe = rb_entry(parent, struct nft_rbtree_elem, node);
58 
59 		this = nft_set_ext_key(&rbe->ext);
60 		d = memcmp(this, key, set->klen);
61 		if (d < 0) {
62 			parent = parent->rb_left;
63 			/* In case of adjacent ranges, we always see the high
64 			 * part of the range in first place, before the low one.
65 			 * So don't update interval if the keys are equal.
66 			 */
67 			if (interval && nft_rbtree_equal(set, this, interval))
68 				continue;
69 			interval = rbe;
70 		} else if (d > 0)
71 			parent = parent->rb_right;
72 		else {
73 			if (!nft_set_elem_active(&rbe->ext, genmask)) {
74 				parent = parent->rb_left;
75 				continue;
76 			}
77 			if (nft_rbtree_interval_end(rbe))
78 				goto out;
79 			spin_unlock_bh(&nft_rbtree_lock);
80 
81 			*ext = &rbe->ext;
82 			return true;
83 		}
84 	}
85 
86 	if (set->flags & NFT_SET_INTERVAL && interval != NULL &&
87 	    nft_set_elem_active(&interval->ext, genmask) &&
88 	    !nft_rbtree_interval_end(interval)) {
89 		spin_unlock_bh(&nft_rbtree_lock);
90 		*ext = &interval->ext;
91 		return true;
92 	}
93 out:
94 	spin_unlock_bh(&nft_rbtree_lock);
95 	return false;
96 }
97 
98 static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set,
99 			       struct nft_rbtree_elem *new,
100 			       struct nft_set_ext **ext)
101 {
102 	struct nft_rbtree *priv = nft_set_priv(set);
103 	u8 genmask = nft_genmask_next(net);
104 	struct nft_rbtree_elem *rbe;
105 	struct rb_node *parent, **p;
106 	int d;
107 
108 	parent = NULL;
109 	p = &priv->root.rb_node;
110 	while (*p != NULL) {
111 		parent = *p;
112 		rbe = rb_entry(parent, struct nft_rbtree_elem, node);
113 		d = memcmp(nft_set_ext_key(&rbe->ext),
114 			   nft_set_ext_key(&new->ext),
115 			   set->klen);
116 		if (d < 0)
117 			p = &parent->rb_left;
118 		else if (d > 0)
119 			p = &parent->rb_right;
120 		else {
121 			if (nft_set_elem_active(&rbe->ext, genmask)) {
122 				if (nft_rbtree_interval_end(rbe) &&
123 				    !nft_rbtree_interval_end(new))
124 					p = &parent->rb_left;
125 				else if (!nft_rbtree_interval_end(rbe) &&
126 					 nft_rbtree_interval_end(new))
127 					p = &parent->rb_right;
128 				else {
129 					*ext = &rbe->ext;
130 					return -EEXIST;
131 				}
132 			}
133 		}
134 	}
135 	rb_link_node(&new->node, parent, p);
136 	rb_insert_color(&new->node, &priv->root);
137 	return 0;
138 }
139 
140 static int nft_rbtree_insert(const struct net *net, const struct nft_set *set,
141 			     const struct nft_set_elem *elem,
142 			     struct nft_set_ext **ext)
143 {
144 	struct nft_rbtree_elem *rbe = elem->priv;
145 	int err;
146 
147 	spin_lock_bh(&nft_rbtree_lock);
148 	err = __nft_rbtree_insert(net, set, rbe, ext);
149 	spin_unlock_bh(&nft_rbtree_lock);
150 
151 	return err;
152 }
153 
154 static void nft_rbtree_remove(const struct nft_set *set,
155 			      const struct nft_set_elem *elem)
156 {
157 	struct nft_rbtree *priv = nft_set_priv(set);
158 	struct nft_rbtree_elem *rbe = elem->priv;
159 
160 	spin_lock_bh(&nft_rbtree_lock);
161 	rb_erase(&rbe->node, &priv->root);
162 	spin_unlock_bh(&nft_rbtree_lock);
163 }
164 
165 static void nft_rbtree_activate(const struct net *net,
166 				const struct nft_set *set,
167 				const struct nft_set_elem *elem)
168 {
169 	struct nft_rbtree_elem *rbe = elem->priv;
170 
171 	nft_set_elem_change_active(net, set, &rbe->ext);
172 }
173 
174 static bool nft_rbtree_deactivate_one(const struct net *net,
175 				      const struct nft_set *set, void *priv)
176 {
177 	struct nft_rbtree_elem *rbe = priv;
178 
179 	nft_set_elem_change_active(net, set, &rbe->ext);
180 	return true;
181 }
182 
183 static void *nft_rbtree_deactivate(const struct net *net,
184 				   const struct nft_set *set,
185 				   const struct nft_set_elem *elem)
186 {
187 	const struct nft_rbtree *priv = nft_set_priv(set);
188 	const struct rb_node *parent = priv->root.rb_node;
189 	struct nft_rbtree_elem *rbe, *this = elem->priv;
190 	u8 genmask = nft_genmask_next(net);
191 	int d;
192 
193 	while (parent != NULL) {
194 		rbe = rb_entry(parent, struct nft_rbtree_elem, node);
195 
196 		d = memcmp(nft_set_ext_key(&rbe->ext), &elem->key.val,
197 					   set->klen);
198 		if (d < 0)
199 			parent = parent->rb_left;
200 		else if (d > 0)
201 			parent = parent->rb_right;
202 		else {
203 			if (!nft_set_elem_active(&rbe->ext, genmask)) {
204 				parent = parent->rb_left;
205 				continue;
206 			}
207 			if (nft_rbtree_interval_end(rbe) &&
208 			    !nft_rbtree_interval_end(this)) {
209 				parent = parent->rb_left;
210 				continue;
211 			} else if (!nft_rbtree_interval_end(rbe) &&
212 				   nft_rbtree_interval_end(this)) {
213 				parent = parent->rb_right;
214 				continue;
215 			}
216 			nft_rbtree_deactivate_one(net, set, rbe);
217 			return rbe;
218 		}
219 	}
220 	return NULL;
221 }
222 
223 static void nft_rbtree_walk(const struct nft_ctx *ctx,
224 			    struct nft_set *set,
225 			    struct nft_set_iter *iter)
226 {
227 	const struct nft_rbtree *priv = nft_set_priv(set);
228 	struct nft_rbtree_elem *rbe;
229 	struct nft_set_elem elem;
230 	struct rb_node *node;
231 
232 	spin_lock_bh(&nft_rbtree_lock);
233 	for (node = rb_first(&priv->root); node != NULL; node = rb_next(node)) {
234 		rbe = rb_entry(node, struct nft_rbtree_elem, node);
235 
236 		if (iter->count < iter->skip)
237 			goto cont;
238 		if (!nft_set_elem_active(&rbe->ext, iter->genmask))
239 			goto cont;
240 
241 		elem.priv = rbe;
242 
243 		iter->err = iter->fn(ctx, set, iter, &elem);
244 		if (iter->err < 0) {
245 			spin_unlock_bh(&nft_rbtree_lock);
246 			return;
247 		}
248 cont:
249 		iter->count++;
250 	}
251 	spin_unlock_bh(&nft_rbtree_lock);
252 }
253 
254 static unsigned int nft_rbtree_privsize(const struct nlattr * const nla[])
255 {
256 	return sizeof(struct nft_rbtree);
257 }
258 
259 static int nft_rbtree_init(const struct nft_set *set,
260 			   const struct nft_set_desc *desc,
261 			   const struct nlattr * const nla[])
262 {
263 	struct nft_rbtree *priv = nft_set_priv(set);
264 
265 	priv->root = RB_ROOT;
266 	return 0;
267 }
268 
269 static void nft_rbtree_destroy(const struct nft_set *set)
270 {
271 	struct nft_rbtree *priv = nft_set_priv(set);
272 	struct nft_rbtree_elem *rbe;
273 	struct rb_node *node;
274 
275 	while ((node = priv->root.rb_node) != NULL) {
276 		rb_erase(node, &priv->root);
277 		rbe = rb_entry(node, struct nft_rbtree_elem, node);
278 		nft_set_elem_destroy(set, rbe, true);
279 	}
280 }
281 
282 static bool nft_rbtree_estimate(const struct nft_set_desc *desc, u32 features,
283 				struct nft_set_estimate *est)
284 {
285 	unsigned int nsize;
286 
287 	nsize = sizeof(struct nft_rbtree_elem);
288 	if (desc->size)
289 		est->size = sizeof(struct nft_rbtree) + desc->size * nsize;
290 	else
291 		est->size = nsize;
292 
293 	est->class = NFT_SET_CLASS_O_LOG_N;
294 
295 	return true;
296 }
297 
298 static struct nft_set_ops nft_rbtree_ops __read_mostly = {
299 	.privsize	= nft_rbtree_privsize,
300 	.elemsize	= offsetof(struct nft_rbtree_elem, ext),
301 	.estimate	= nft_rbtree_estimate,
302 	.init		= nft_rbtree_init,
303 	.destroy	= nft_rbtree_destroy,
304 	.insert		= nft_rbtree_insert,
305 	.remove		= nft_rbtree_remove,
306 	.deactivate	= nft_rbtree_deactivate,
307 	.deactivate_one	= nft_rbtree_deactivate_one,
308 	.activate	= nft_rbtree_activate,
309 	.lookup		= nft_rbtree_lookup,
310 	.walk		= nft_rbtree_walk,
311 	.features	= NFT_SET_INTERVAL | NFT_SET_MAP,
312 	.owner		= THIS_MODULE,
313 };
314 
315 static int __init nft_rbtree_module_init(void)
316 {
317 	return nft_register_set(&nft_rbtree_ops);
318 }
319 
320 static void __exit nft_rbtree_module_exit(void)
321 {
322 	nft_unregister_set(&nft_rbtree_ops);
323 }
324 
325 module_init(nft_rbtree_module_init);
326 module_exit(nft_rbtree_module_exit);
327 
328 MODULE_LICENSE("GPL");
329 MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
330 MODULE_ALIAS_NFT_SET();
331