1 /* 2 * Copyright (c) 2008-2014 Patrick McHardy <kaber@trash.net> 3 * 4 * This program is free software; you can redistribute it and/or modify 5 * it under the terms of the GNU General Public License version 2 as 6 * published by the Free Software Foundation. 7 * 8 * Development of this code funded by Astaro AG (http://www.astaro.com/) 9 */ 10 11 #include <linux/kernel.h> 12 #include <linux/init.h> 13 #include <linux/module.h> 14 #include <linux/list.h> 15 #include <linux/log2.h> 16 #include <linux/jhash.h> 17 #include <linux/netlink.h> 18 #include <linux/workqueue.h> 19 #include <linux/rhashtable.h> 20 #include <linux/netfilter.h> 21 #include <linux/netfilter/nf_tables.h> 22 #include <net/netfilter/nf_tables.h> 23 24 /* We target a hash table size of 4, element hint is 75% of final size */ 25 #define NFT_RHASH_ELEMENT_HINT 3 26 27 struct nft_rhash { 28 struct rhashtable ht; 29 struct delayed_work gc_work; 30 }; 31 32 struct nft_rhash_elem { 33 struct rhash_head node; 34 struct nft_set_ext ext; 35 }; 36 37 struct nft_rhash_cmp_arg { 38 const struct nft_set *set; 39 const u32 *key; 40 u8 genmask; 41 }; 42 43 static inline u32 nft_rhash_key(const void *data, u32 len, u32 seed) 44 { 45 const struct nft_rhash_cmp_arg *arg = data; 46 47 return jhash(arg->key, len, seed); 48 } 49 50 static inline u32 nft_rhash_obj(const void *data, u32 len, u32 seed) 51 { 52 const struct nft_rhash_elem *he = data; 53 54 return jhash(nft_set_ext_key(&he->ext), len, seed); 55 } 56 57 static inline int nft_rhash_cmp(struct rhashtable_compare_arg *arg, 58 const void *ptr) 59 { 60 const struct nft_rhash_cmp_arg *x = arg->key; 61 const struct nft_rhash_elem *he = ptr; 62 63 if (memcmp(nft_set_ext_key(&he->ext), x->key, x->set->klen)) 64 return 1; 65 if (nft_set_elem_expired(&he->ext)) 66 return 1; 67 if (!nft_set_elem_active(&he->ext, x->genmask)) 68 return 1; 69 return 0; 70 } 71 72 static const struct rhashtable_params nft_rhash_params = { 73 .head_offset = offsetof(struct nft_rhash_elem, node), 74 .hashfn = nft_rhash_key, 75 .obj_hashfn = nft_rhash_obj, 76 .obj_cmpfn = nft_rhash_cmp, 77 .automatic_shrinking = true, 78 }; 79 80 static bool nft_rhash_lookup(const struct net *net, const struct nft_set *set, 81 const u32 *key, const struct nft_set_ext **ext) 82 { 83 struct nft_rhash *priv = nft_set_priv(set); 84 const struct nft_rhash_elem *he; 85 struct nft_rhash_cmp_arg arg = { 86 .genmask = nft_genmask_cur(net), 87 .set = set, 88 .key = key, 89 }; 90 91 he = rhashtable_lookup_fast(&priv->ht, &arg, nft_rhash_params); 92 if (he != NULL) 93 *ext = &he->ext; 94 95 return !!he; 96 } 97 98 static void *nft_rhash_get(const struct net *net, const struct nft_set *set, 99 const struct nft_set_elem *elem, unsigned int flags) 100 { 101 struct nft_rhash *priv = nft_set_priv(set); 102 struct nft_rhash_elem *he; 103 struct nft_rhash_cmp_arg arg = { 104 .genmask = nft_genmask_cur(net), 105 .set = set, 106 .key = elem->key.val.data, 107 }; 108 109 he = rhashtable_lookup_fast(&priv->ht, &arg, nft_rhash_params); 110 if (he != NULL) 111 return he; 112 113 return ERR_PTR(-ENOENT); 114 } 115 116 static bool nft_rhash_update(struct nft_set *set, const u32 *key, 117 void *(*new)(struct nft_set *, 118 const struct nft_expr *, 119 struct nft_regs *regs), 120 const struct nft_expr *expr, 121 struct nft_regs *regs, 122 const struct nft_set_ext **ext) 123 { 124 struct nft_rhash *priv = nft_set_priv(set); 125 struct nft_rhash_elem *he, *prev; 126 struct nft_rhash_cmp_arg arg = { 127 .genmask = NFT_GENMASK_ANY, 128 .set = set, 129 .key = key, 130 }; 131 132 he = rhashtable_lookup_fast(&priv->ht, &arg, nft_rhash_params); 133 if (he != NULL) 134 goto out; 135 136 he = new(set, expr, regs); 137 if (he == NULL) 138 goto err1; 139 140 prev = rhashtable_lookup_get_insert_key(&priv->ht, &arg, &he->node, 141 nft_rhash_params); 142 if (IS_ERR(prev)) 143 goto err2; 144 145 /* Another cpu may race to insert the element with the same key */ 146 if (prev) { 147 nft_set_elem_destroy(set, he, true); 148 he = prev; 149 } 150 151 out: 152 *ext = &he->ext; 153 return true; 154 155 err2: 156 nft_set_elem_destroy(set, he, true); 157 err1: 158 return false; 159 } 160 161 static int nft_rhash_insert(const struct net *net, const struct nft_set *set, 162 const struct nft_set_elem *elem, 163 struct nft_set_ext **ext) 164 { 165 struct nft_rhash *priv = nft_set_priv(set); 166 struct nft_rhash_elem *he = elem->priv; 167 struct nft_rhash_cmp_arg arg = { 168 .genmask = nft_genmask_next(net), 169 .set = set, 170 .key = elem->key.val.data, 171 }; 172 struct nft_rhash_elem *prev; 173 174 prev = rhashtable_lookup_get_insert_key(&priv->ht, &arg, &he->node, 175 nft_rhash_params); 176 if (IS_ERR(prev)) 177 return PTR_ERR(prev); 178 if (prev) { 179 *ext = &prev->ext; 180 return -EEXIST; 181 } 182 return 0; 183 } 184 185 static void nft_rhash_activate(const struct net *net, const struct nft_set *set, 186 const struct nft_set_elem *elem) 187 { 188 struct nft_rhash_elem *he = elem->priv; 189 190 nft_set_elem_change_active(net, set, &he->ext); 191 nft_set_elem_clear_busy(&he->ext); 192 } 193 194 static bool nft_rhash_flush(const struct net *net, 195 const struct nft_set *set, void *priv) 196 { 197 struct nft_rhash_elem *he = priv; 198 199 if (!nft_set_elem_mark_busy(&he->ext) || 200 !nft_is_active(net, &he->ext)) { 201 nft_set_elem_change_active(net, set, &he->ext); 202 return true; 203 } 204 return false; 205 } 206 207 static void *nft_rhash_deactivate(const struct net *net, 208 const struct nft_set *set, 209 const struct nft_set_elem *elem) 210 { 211 struct nft_rhash *priv = nft_set_priv(set); 212 struct nft_rhash_elem *he; 213 struct nft_rhash_cmp_arg arg = { 214 .genmask = nft_genmask_next(net), 215 .set = set, 216 .key = elem->key.val.data, 217 }; 218 219 rcu_read_lock(); 220 he = rhashtable_lookup_fast(&priv->ht, &arg, nft_rhash_params); 221 if (he != NULL && 222 !nft_rhash_flush(net, set, he)) 223 he = NULL; 224 225 rcu_read_unlock(); 226 227 return he; 228 } 229 230 static void nft_rhash_remove(const struct net *net, 231 const struct nft_set *set, 232 const struct nft_set_elem *elem) 233 { 234 struct nft_rhash *priv = nft_set_priv(set); 235 struct nft_rhash_elem *he = elem->priv; 236 237 rhashtable_remove_fast(&priv->ht, &he->node, nft_rhash_params); 238 } 239 240 static void nft_rhash_walk(const struct nft_ctx *ctx, struct nft_set *set, 241 struct nft_set_iter *iter) 242 { 243 struct nft_rhash *priv = nft_set_priv(set); 244 struct nft_rhash_elem *he; 245 struct rhashtable_iter hti; 246 struct nft_set_elem elem; 247 int err; 248 249 err = rhashtable_walk_init(&priv->ht, &hti, GFP_ATOMIC); 250 iter->err = err; 251 if (err) 252 return; 253 254 rhashtable_walk_start(&hti); 255 256 while ((he = rhashtable_walk_next(&hti))) { 257 if (IS_ERR(he)) { 258 err = PTR_ERR(he); 259 if (err != -EAGAIN) { 260 iter->err = err; 261 goto out; 262 } 263 264 continue; 265 } 266 267 if (iter->count < iter->skip) 268 goto cont; 269 if (nft_set_elem_expired(&he->ext)) 270 goto cont; 271 if (!nft_set_elem_active(&he->ext, iter->genmask)) 272 goto cont; 273 274 elem.priv = he; 275 276 iter->err = iter->fn(ctx, set, iter, &elem); 277 if (iter->err < 0) 278 goto out; 279 280 cont: 281 iter->count++; 282 } 283 284 out: 285 rhashtable_walk_stop(&hti); 286 rhashtable_walk_exit(&hti); 287 } 288 289 static void nft_rhash_gc(struct work_struct *work) 290 { 291 struct nft_set *set; 292 struct nft_rhash_elem *he; 293 struct nft_rhash *priv; 294 struct nft_set_gc_batch *gcb = NULL; 295 struct rhashtable_iter hti; 296 int err; 297 298 priv = container_of(work, struct nft_rhash, gc_work.work); 299 set = nft_set_container_of(priv); 300 301 err = rhashtable_walk_init(&priv->ht, &hti, GFP_KERNEL); 302 if (err) 303 goto schedule; 304 305 rhashtable_walk_start(&hti); 306 307 while ((he = rhashtable_walk_next(&hti))) { 308 if (IS_ERR(he)) { 309 if (PTR_ERR(he) != -EAGAIN) 310 goto out; 311 continue; 312 } 313 314 if (nft_set_ext_exists(&he->ext, NFT_SET_EXT_EXPR)) { 315 struct nft_expr *expr = nft_set_ext_expr(&he->ext); 316 317 if (expr->ops->gc && 318 expr->ops->gc(read_pnet(&set->net), expr)) 319 goto gc; 320 } 321 if (!nft_set_elem_expired(&he->ext)) 322 continue; 323 gc: 324 if (nft_set_elem_mark_busy(&he->ext)) 325 continue; 326 327 gcb = nft_set_gc_batch_check(set, gcb, GFP_ATOMIC); 328 if (gcb == NULL) 329 goto out; 330 rhashtable_remove_fast(&priv->ht, &he->node, nft_rhash_params); 331 atomic_dec(&set->nelems); 332 nft_set_gc_batch_add(gcb, he); 333 } 334 out: 335 rhashtable_walk_stop(&hti); 336 rhashtable_walk_exit(&hti); 337 338 nft_set_gc_batch_complete(gcb); 339 schedule: 340 queue_delayed_work(system_power_efficient_wq, &priv->gc_work, 341 nft_set_gc_interval(set)); 342 } 343 344 static unsigned int nft_rhash_privsize(const struct nlattr * const nla[], 345 const struct nft_set_desc *desc) 346 { 347 return sizeof(struct nft_rhash); 348 } 349 350 static void nft_rhash_gc_init(const struct nft_set *set) 351 { 352 struct nft_rhash *priv = nft_set_priv(set); 353 354 queue_delayed_work(system_power_efficient_wq, &priv->gc_work, 355 nft_set_gc_interval(set)); 356 } 357 358 static int nft_rhash_init(const struct nft_set *set, 359 const struct nft_set_desc *desc, 360 const struct nlattr * const tb[]) 361 { 362 struct nft_rhash *priv = nft_set_priv(set); 363 struct rhashtable_params params = nft_rhash_params; 364 int err; 365 366 params.nelem_hint = desc->size ?: NFT_RHASH_ELEMENT_HINT; 367 params.key_len = set->klen; 368 369 err = rhashtable_init(&priv->ht, ¶ms); 370 if (err < 0) 371 return err; 372 373 INIT_DEFERRABLE_WORK(&priv->gc_work, nft_rhash_gc); 374 if (set->flags & NFT_SET_TIMEOUT) 375 nft_rhash_gc_init(set); 376 377 return 0; 378 } 379 380 static void nft_rhash_elem_destroy(void *ptr, void *arg) 381 { 382 nft_set_elem_destroy(arg, ptr, true); 383 } 384 385 static void nft_rhash_destroy(const struct nft_set *set) 386 { 387 struct nft_rhash *priv = nft_set_priv(set); 388 389 cancel_delayed_work_sync(&priv->gc_work); 390 rcu_barrier(); 391 rhashtable_free_and_destroy(&priv->ht, nft_rhash_elem_destroy, 392 (void *)set); 393 } 394 395 static u32 nft_hash_buckets(u32 size) 396 { 397 return roundup_pow_of_two(size * 4 / 3); 398 } 399 400 static bool nft_rhash_estimate(const struct nft_set_desc *desc, u32 features, 401 struct nft_set_estimate *est) 402 { 403 est->size = ~0; 404 est->lookup = NFT_SET_CLASS_O_1; 405 est->space = NFT_SET_CLASS_O_N; 406 407 return true; 408 } 409 410 struct nft_hash { 411 u32 seed; 412 u32 buckets; 413 struct hlist_head table[]; 414 }; 415 416 struct nft_hash_elem { 417 struct hlist_node node; 418 struct nft_set_ext ext; 419 }; 420 421 static bool nft_hash_lookup(const struct net *net, const struct nft_set *set, 422 const u32 *key, const struct nft_set_ext **ext) 423 { 424 struct nft_hash *priv = nft_set_priv(set); 425 u8 genmask = nft_genmask_cur(net); 426 const struct nft_hash_elem *he; 427 u32 hash; 428 429 hash = jhash(key, set->klen, priv->seed); 430 hash = reciprocal_scale(hash, priv->buckets); 431 hlist_for_each_entry_rcu(he, &priv->table[hash], node) { 432 if (!memcmp(nft_set_ext_key(&he->ext), key, set->klen) && 433 nft_set_elem_active(&he->ext, genmask)) { 434 *ext = &he->ext; 435 return true; 436 } 437 } 438 return false; 439 } 440 441 static void *nft_hash_get(const struct net *net, const struct nft_set *set, 442 const struct nft_set_elem *elem, unsigned int flags) 443 { 444 struct nft_hash *priv = nft_set_priv(set); 445 u8 genmask = nft_genmask_cur(net); 446 struct nft_hash_elem *he; 447 u32 hash; 448 449 hash = jhash(elem->key.val.data, set->klen, priv->seed); 450 hash = reciprocal_scale(hash, priv->buckets); 451 hlist_for_each_entry_rcu(he, &priv->table[hash], node) { 452 if (!memcmp(nft_set_ext_key(&he->ext), elem->key.val.data, set->klen) && 453 nft_set_elem_active(&he->ext, genmask)) 454 return he; 455 } 456 return ERR_PTR(-ENOENT); 457 } 458 459 /* nft_hash_select_ops() makes sure key size can be either 2 or 4 bytes . */ 460 static inline u32 nft_hash_key(const u32 *key, u32 klen) 461 { 462 if (klen == 4) 463 return *key; 464 465 return *(u16 *)key; 466 } 467 468 static bool nft_hash_lookup_fast(const struct net *net, 469 const struct nft_set *set, 470 const u32 *key, const struct nft_set_ext **ext) 471 { 472 struct nft_hash *priv = nft_set_priv(set); 473 u8 genmask = nft_genmask_cur(net); 474 const struct nft_hash_elem *he; 475 u32 hash, k1, k2; 476 477 k1 = nft_hash_key(key, set->klen); 478 hash = jhash_1word(k1, priv->seed); 479 hash = reciprocal_scale(hash, priv->buckets); 480 hlist_for_each_entry_rcu(he, &priv->table[hash], node) { 481 k2 = nft_hash_key(nft_set_ext_key(&he->ext)->data, set->klen); 482 if (k1 == k2 && 483 nft_set_elem_active(&he->ext, genmask)) { 484 *ext = &he->ext; 485 return true; 486 } 487 } 488 return false; 489 } 490 491 static int nft_hash_insert(const struct net *net, const struct nft_set *set, 492 const struct nft_set_elem *elem, 493 struct nft_set_ext **ext) 494 { 495 struct nft_hash_elem *this = elem->priv, *he; 496 struct nft_hash *priv = nft_set_priv(set); 497 u8 genmask = nft_genmask_next(net); 498 u32 hash; 499 500 hash = jhash(nft_set_ext_key(&this->ext), set->klen, priv->seed); 501 hash = reciprocal_scale(hash, priv->buckets); 502 hlist_for_each_entry(he, &priv->table[hash], node) { 503 if (!memcmp(nft_set_ext_key(&this->ext), 504 nft_set_ext_key(&he->ext), set->klen) && 505 nft_set_elem_active(&he->ext, genmask)) { 506 *ext = &he->ext; 507 return -EEXIST; 508 } 509 } 510 hlist_add_head_rcu(&this->node, &priv->table[hash]); 511 return 0; 512 } 513 514 static void nft_hash_activate(const struct net *net, const struct nft_set *set, 515 const struct nft_set_elem *elem) 516 { 517 struct nft_hash_elem *he = elem->priv; 518 519 nft_set_elem_change_active(net, set, &he->ext); 520 } 521 522 static bool nft_hash_flush(const struct net *net, 523 const struct nft_set *set, void *priv) 524 { 525 struct nft_hash_elem *he = priv; 526 527 nft_set_elem_change_active(net, set, &he->ext); 528 return true; 529 } 530 531 static void *nft_hash_deactivate(const struct net *net, 532 const struct nft_set *set, 533 const struct nft_set_elem *elem) 534 { 535 struct nft_hash *priv = nft_set_priv(set); 536 struct nft_hash_elem *this = elem->priv, *he; 537 u8 genmask = nft_genmask_next(net); 538 u32 hash; 539 540 hash = jhash(nft_set_ext_key(&this->ext), set->klen, priv->seed); 541 hash = reciprocal_scale(hash, priv->buckets); 542 hlist_for_each_entry(he, &priv->table[hash], node) { 543 if (!memcmp(nft_set_ext_key(&this->ext), &elem->key.val, 544 set->klen) && 545 nft_set_elem_active(&he->ext, genmask)) { 546 nft_set_elem_change_active(net, set, &he->ext); 547 return he; 548 } 549 } 550 return NULL; 551 } 552 553 static void nft_hash_remove(const struct net *net, 554 const struct nft_set *set, 555 const struct nft_set_elem *elem) 556 { 557 struct nft_hash_elem *he = elem->priv; 558 559 hlist_del_rcu(&he->node); 560 } 561 562 static void nft_hash_walk(const struct nft_ctx *ctx, struct nft_set *set, 563 struct nft_set_iter *iter) 564 { 565 struct nft_hash *priv = nft_set_priv(set); 566 struct nft_hash_elem *he; 567 struct nft_set_elem elem; 568 int i; 569 570 for (i = 0; i < priv->buckets; i++) { 571 hlist_for_each_entry_rcu(he, &priv->table[i], node) { 572 if (iter->count < iter->skip) 573 goto cont; 574 if (!nft_set_elem_active(&he->ext, iter->genmask)) 575 goto cont; 576 577 elem.priv = he; 578 579 iter->err = iter->fn(ctx, set, iter, &elem); 580 if (iter->err < 0) 581 return; 582 cont: 583 iter->count++; 584 } 585 } 586 } 587 588 static unsigned int nft_hash_privsize(const struct nlattr * const nla[], 589 const struct nft_set_desc *desc) 590 { 591 return sizeof(struct nft_hash) + 592 nft_hash_buckets(desc->size) * sizeof(struct hlist_head); 593 } 594 595 static int nft_hash_init(const struct nft_set *set, 596 const struct nft_set_desc *desc, 597 const struct nlattr * const tb[]) 598 { 599 struct nft_hash *priv = nft_set_priv(set); 600 601 priv->buckets = nft_hash_buckets(desc->size); 602 get_random_bytes(&priv->seed, sizeof(priv->seed)); 603 604 return 0; 605 } 606 607 static void nft_hash_destroy(const struct nft_set *set) 608 { 609 struct nft_hash *priv = nft_set_priv(set); 610 struct nft_hash_elem *he; 611 struct hlist_node *next; 612 int i; 613 614 for (i = 0; i < priv->buckets; i++) { 615 hlist_for_each_entry_safe(he, next, &priv->table[i], node) { 616 hlist_del_rcu(&he->node); 617 nft_set_elem_destroy(set, he, true); 618 } 619 } 620 } 621 622 static bool nft_hash_estimate(const struct nft_set_desc *desc, u32 features, 623 struct nft_set_estimate *est) 624 { 625 if (!desc->size) 626 return false; 627 628 if (desc->klen == 4) 629 return false; 630 631 est->size = sizeof(struct nft_hash) + 632 nft_hash_buckets(desc->size) * sizeof(struct hlist_head) + 633 desc->size * sizeof(struct nft_hash_elem); 634 est->lookup = NFT_SET_CLASS_O_1; 635 est->space = NFT_SET_CLASS_O_N; 636 637 return true; 638 } 639 640 static bool nft_hash_fast_estimate(const struct nft_set_desc *desc, u32 features, 641 struct nft_set_estimate *est) 642 { 643 if (!desc->size) 644 return false; 645 646 if (desc->klen != 4) 647 return false; 648 649 est->size = sizeof(struct nft_hash) + 650 nft_hash_buckets(desc->size) * sizeof(struct hlist_head) + 651 desc->size * sizeof(struct nft_hash_elem); 652 est->lookup = NFT_SET_CLASS_O_1; 653 est->space = NFT_SET_CLASS_O_N; 654 655 return true; 656 } 657 658 struct nft_set_type nft_set_rhash_type __read_mostly = { 659 .owner = THIS_MODULE, 660 .features = NFT_SET_MAP | NFT_SET_OBJECT | 661 NFT_SET_TIMEOUT | NFT_SET_EVAL, 662 .ops = { 663 .privsize = nft_rhash_privsize, 664 .elemsize = offsetof(struct nft_rhash_elem, ext), 665 .estimate = nft_rhash_estimate, 666 .init = nft_rhash_init, 667 .gc_init = nft_rhash_gc_init, 668 .destroy = nft_rhash_destroy, 669 .insert = nft_rhash_insert, 670 .activate = nft_rhash_activate, 671 .deactivate = nft_rhash_deactivate, 672 .flush = nft_rhash_flush, 673 .remove = nft_rhash_remove, 674 .lookup = nft_rhash_lookup, 675 .update = nft_rhash_update, 676 .walk = nft_rhash_walk, 677 .get = nft_rhash_get, 678 }, 679 }; 680 681 struct nft_set_type nft_set_hash_type __read_mostly = { 682 .owner = THIS_MODULE, 683 .features = NFT_SET_MAP | NFT_SET_OBJECT, 684 .ops = { 685 .privsize = nft_hash_privsize, 686 .elemsize = offsetof(struct nft_hash_elem, ext), 687 .estimate = nft_hash_estimate, 688 .init = nft_hash_init, 689 .destroy = nft_hash_destroy, 690 .insert = nft_hash_insert, 691 .activate = nft_hash_activate, 692 .deactivate = nft_hash_deactivate, 693 .flush = nft_hash_flush, 694 .remove = nft_hash_remove, 695 .lookup = nft_hash_lookup, 696 .walk = nft_hash_walk, 697 .get = nft_hash_get, 698 }, 699 }; 700 701 struct nft_set_type nft_set_hash_fast_type __read_mostly = { 702 .owner = THIS_MODULE, 703 .features = NFT_SET_MAP | NFT_SET_OBJECT, 704 .ops = { 705 .privsize = nft_hash_privsize, 706 .elemsize = offsetof(struct nft_hash_elem, ext), 707 .estimate = nft_hash_fast_estimate, 708 .init = nft_hash_init, 709 .destroy = nft_hash_destroy, 710 .insert = nft_hash_insert, 711 .activate = nft_hash_activate, 712 .deactivate = nft_hash_deactivate, 713 .flush = nft_hash_flush, 714 .remove = nft_hash_remove, 715 .lookup = nft_hash_lookup_fast, 716 .walk = nft_hash_walk, 717 .get = nft_hash_get, 718 }, 719 }; 720