xref: /linux/fs/nilfs2/page.c (revision 33619f0d3ff715a2a5499520967d526ad931d70d)
1 /*
2  * page.c - buffer/page management specific to NILFS
3  *
4  * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation; either version 2 of the License, or
9  * (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
19  *
20  * Written by Ryusuke Konishi <ryusuke@osrg.net>,
21  *            Seiji Kihara <kihara@osrg.net>.
22  */
23 
24 #include <linux/pagemap.h>
25 #include <linux/writeback.h>
26 #include <linux/swap.h>
27 #include <linux/bitops.h>
28 #include <linux/page-flags.h>
29 #include <linux/list.h>
30 #include <linux/highmem.h>
31 #include <linux/pagevec.h>
32 #include <linux/gfp.h>
33 #include "nilfs.h"
34 #include "page.h"
35 #include "mdt.h"
36 
37 
38 #define NILFS_BUFFER_INHERENT_BITS  \
39 	((1UL << BH_Uptodate) | (1UL << BH_Mapped) | (1UL << BH_NILFS_Node) | \
40 	 (1UL << BH_NILFS_Volatile) | (1UL << BH_NILFS_Allocated) | \
41 	 (1UL << BH_NILFS_Checked))
42 
43 static struct buffer_head *
44 __nilfs_get_page_block(struct page *page, unsigned long block, pgoff_t index,
45 		       int blkbits, unsigned long b_state)
46 
47 {
48 	unsigned long first_block;
49 	struct buffer_head *bh;
50 
51 	if (!page_has_buffers(page))
52 		create_empty_buffers(page, 1 << blkbits, b_state);
53 
54 	first_block = (unsigned long)index << (PAGE_CACHE_SHIFT - blkbits);
55 	bh = nilfs_page_get_nth_block(page, block - first_block);
56 
57 	touch_buffer(bh);
58 	wait_on_buffer(bh);
59 	return bh;
60 }
61 
62 /*
63  * Since the page cache of B-tree node pages or data page cache of pseudo
64  * inodes does not have a valid mapping->host pointer, calling
65  * mark_buffer_dirty() for their buffers causes a NULL pointer dereference;
66  * it calls __mark_inode_dirty(NULL) through __set_page_dirty().
67  * To avoid this problem, the old style mark_buffer_dirty() is used instead.
68  */
69 void nilfs_mark_buffer_dirty(struct buffer_head *bh)
70 {
71 	if (!buffer_dirty(bh) && !test_set_buffer_dirty(bh))
72 		__set_page_dirty_nobuffers(bh->b_page);
73 }
74 
75 struct buffer_head *nilfs_grab_buffer(struct inode *inode,
76 				      struct address_space *mapping,
77 				      unsigned long blkoff,
78 				      unsigned long b_state)
79 {
80 	int blkbits = inode->i_blkbits;
81 	pgoff_t index = blkoff >> (PAGE_CACHE_SHIFT - blkbits);
82 	struct page *page;
83 	struct buffer_head *bh;
84 
85 	page = grab_cache_page(mapping, index);
86 	if (unlikely(!page))
87 		return NULL;
88 
89 	bh = __nilfs_get_page_block(page, blkoff, index, blkbits, b_state);
90 	if (unlikely(!bh)) {
91 		unlock_page(page);
92 		page_cache_release(page);
93 		return NULL;
94 	}
95 	return bh;
96 }
97 
98 /**
99  * nilfs_forget_buffer - discard dirty state
100  * @inode: owner inode of the buffer
101  * @bh: buffer head of the buffer to be discarded
102  */
103 void nilfs_forget_buffer(struct buffer_head *bh)
104 {
105 	struct page *page = bh->b_page;
106 
107 	lock_buffer(bh);
108 	clear_buffer_nilfs_volatile(bh);
109 	clear_buffer_nilfs_checked(bh);
110 	clear_buffer_nilfs_redirected(bh);
111 	clear_buffer_dirty(bh);
112 	if (nilfs_page_buffers_clean(page))
113 		__nilfs_clear_page_dirty(page);
114 
115 	clear_buffer_uptodate(bh);
116 	clear_buffer_mapped(bh);
117 	bh->b_blocknr = -1;
118 	ClearPageUptodate(page);
119 	ClearPageMappedToDisk(page);
120 	unlock_buffer(bh);
121 	brelse(bh);
122 }
123 
124 /**
125  * nilfs_copy_buffer -- copy buffer data and flags
126  * @dbh: destination buffer
127  * @sbh: source buffer
128  */
129 void nilfs_copy_buffer(struct buffer_head *dbh, struct buffer_head *sbh)
130 {
131 	void *kaddr0, *kaddr1;
132 	unsigned long bits;
133 	struct page *spage = sbh->b_page, *dpage = dbh->b_page;
134 	struct buffer_head *bh;
135 
136 	kaddr0 = kmap_atomic(spage, KM_USER0);
137 	kaddr1 = kmap_atomic(dpage, KM_USER1);
138 	memcpy(kaddr1 + bh_offset(dbh), kaddr0 + bh_offset(sbh), sbh->b_size);
139 	kunmap_atomic(kaddr1, KM_USER1);
140 	kunmap_atomic(kaddr0, KM_USER0);
141 
142 	dbh->b_state = sbh->b_state & NILFS_BUFFER_INHERENT_BITS;
143 	dbh->b_blocknr = sbh->b_blocknr;
144 	dbh->b_bdev = sbh->b_bdev;
145 
146 	bh = dbh;
147 	bits = sbh->b_state & ((1UL << BH_Uptodate) | (1UL << BH_Mapped));
148 	while ((bh = bh->b_this_page) != dbh) {
149 		lock_buffer(bh);
150 		bits &= bh->b_state;
151 		unlock_buffer(bh);
152 	}
153 	if (bits & (1UL << BH_Uptodate))
154 		SetPageUptodate(dpage);
155 	else
156 		ClearPageUptodate(dpage);
157 	if (bits & (1UL << BH_Mapped))
158 		SetPageMappedToDisk(dpage);
159 	else
160 		ClearPageMappedToDisk(dpage);
161 }
162 
163 /**
164  * nilfs_page_buffers_clean - check if a page has dirty buffers or not.
165  * @page: page to be checked
166  *
167  * nilfs_page_buffers_clean() returns zero if the page has dirty buffers.
168  * Otherwise, it returns non-zero value.
169  */
170 int nilfs_page_buffers_clean(struct page *page)
171 {
172 	struct buffer_head *bh, *head;
173 
174 	bh = head = page_buffers(page);
175 	do {
176 		if (buffer_dirty(bh))
177 			return 0;
178 		bh = bh->b_this_page;
179 	} while (bh != head);
180 	return 1;
181 }
182 
183 void nilfs_page_bug(struct page *page)
184 {
185 	struct address_space *m;
186 	unsigned long ino = 0;
187 
188 	if (unlikely(!page)) {
189 		printk(KERN_CRIT "NILFS_PAGE_BUG(NULL)\n");
190 		return;
191 	}
192 
193 	m = page->mapping;
194 	if (m) {
195 		struct inode *inode = NILFS_AS_I(m);
196 		if (inode != NULL)
197 			ino = inode->i_ino;
198 	}
199 	printk(KERN_CRIT "NILFS_PAGE_BUG(%p): cnt=%d index#=%llu flags=0x%lx "
200 	       "mapping=%p ino=%lu\n",
201 	       page, atomic_read(&page->_count),
202 	       (unsigned long long)page->index, page->flags, m, ino);
203 
204 	if (page_has_buffers(page)) {
205 		struct buffer_head *bh, *head;
206 		int i = 0;
207 
208 		bh = head = page_buffers(page);
209 		do {
210 			printk(KERN_CRIT
211 			       " BH[%d] %p: cnt=%d block#=%llu state=0x%lx\n",
212 			       i++, bh, atomic_read(&bh->b_count),
213 			       (unsigned long long)bh->b_blocknr, bh->b_state);
214 			bh = bh->b_this_page;
215 		} while (bh != head);
216 	}
217 }
218 
219 /**
220  * nilfs_alloc_private_page - allocate a private page with buffer heads
221  *
222  * Return Value: On success, a pointer to the allocated page is returned.
223  * On error, NULL is returned.
224  */
225 struct page *nilfs_alloc_private_page(struct block_device *bdev, int size,
226 				      unsigned long state)
227 {
228 	struct buffer_head *bh, *head, *tail;
229 	struct page *page;
230 
231 	page = alloc_page(GFP_NOFS); /* page_count of the returned page is 1 */
232 	if (unlikely(!page))
233 		return NULL;
234 
235 	lock_page(page);
236 	head = alloc_page_buffers(page, size, 0);
237 	if (unlikely(!head)) {
238 		unlock_page(page);
239 		__free_page(page);
240 		return NULL;
241 	}
242 
243 	bh = head;
244 	do {
245 		bh->b_state = (1UL << BH_NILFS_Allocated) | state;
246 		tail = bh;
247 		bh->b_bdev = bdev;
248 		bh = bh->b_this_page;
249 	} while (bh);
250 
251 	tail->b_this_page = head;
252 	attach_page_buffers(page, head);
253 
254 	return page;
255 }
256 
257 void nilfs_free_private_page(struct page *page)
258 {
259 	BUG_ON(!PageLocked(page));
260 	BUG_ON(page->mapping);
261 
262 	if (page_has_buffers(page) && !try_to_free_buffers(page))
263 		NILFS_PAGE_BUG(page, "failed to free page");
264 
265 	unlock_page(page);
266 	__free_page(page);
267 }
268 
269 /**
270  * nilfs_copy_page -- copy the page with buffers
271  * @dst: destination page
272  * @src: source page
273  * @copy_dirty: flag whether to copy dirty states on the page's buffer heads.
274  *
275  * This function is for both data pages and btnode pages.  The dirty flag
276  * should be treated by caller.  The page must not be under i/o.
277  * Both src and dst page must be locked
278  */
279 static void nilfs_copy_page(struct page *dst, struct page *src, int copy_dirty)
280 {
281 	struct buffer_head *dbh, *dbufs, *sbh, *sbufs;
282 	unsigned long mask = NILFS_BUFFER_INHERENT_BITS;
283 
284 	BUG_ON(PageWriteback(dst));
285 
286 	sbh = sbufs = page_buffers(src);
287 	if (!page_has_buffers(dst))
288 		create_empty_buffers(dst, sbh->b_size, 0);
289 
290 	if (copy_dirty)
291 		mask |= (1UL << BH_Dirty);
292 
293 	dbh = dbufs = page_buffers(dst);
294 	do {
295 		lock_buffer(sbh);
296 		lock_buffer(dbh);
297 		dbh->b_state = sbh->b_state & mask;
298 		dbh->b_blocknr = sbh->b_blocknr;
299 		dbh->b_bdev = sbh->b_bdev;
300 		sbh = sbh->b_this_page;
301 		dbh = dbh->b_this_page;
302 	} while (dbh != dbufs);
303 
304 	copy_highpage(dst, src);
305 
306 	if (PageUptodate(src) && !PageUptodate(dst))
307 		SetPageUptodate(dst);
308 	else if (!PageUptodate(src) && PageUptodate(dst))
309 		ClearPageUptodate(dst);
310 	if (PageMappedToDisk(src) && !PageMappedToDisk(dst))
311 		SetPageMappedToDisk(dst);
312 	else if (!PageMappedToDisk(src) && PageMappedToDisk(dst))
313 		ClearPageMappedToDisk(dst);
314 
315 	do {
316 		unlock_buffer(sbh);
317 		unlock_buffer(dbh);
318 		sbh = sbh->b_this_page;
319 		dbh = dbh->b_this_page;
320 	} while (dbh != dbufs);
321 }
322 
323 int nilfs_copy_dirty_pages(struct address_space *dmap,
324 			   struct address_space *smap)
325 {
326 	struct pagevec pvec;
327 	unsigned int i;
328 	pgoff_t index = 0;
329 	int err = 0;
330 
331 	pagevec_init(&pvec, 0);
332 repeat:
333 	if (!pagevec_lookup_tag(&pvec, smap, &index, PAGECACHE_TAG_DIRTY,
334 				PAGEVEC_SIZE))
335 		return 0;
336 
337 	for (i = 0; i < pagevec_count(&pvec); i++) {
338 		struct page *page = pvec.pages[i], *dpage;
339 
340 		lock_page(page);
341 		if (unlikely(!PageDirty(page)))
342 			NILFS_PAGE_BUG(page, "inconsistent dirty state");
343 
344 		dpage = grab_cache_page(dmap, page->index);
345 		if (unlikely(!dpage)) {
346 			/* No empty page is added to the page cache */
347 			err = -ENOMEM;
348 			unlock_page(page);
349 			break;
350 		}
351 		if (unlikely(!page_has_buffers(page)))
352 			NILFS_PAGE_BUG(page,
353 				       "found empty page in dat page cache");
354 
355 		nilfs_copy_page(dpage, page, 1);
356 		__set_page_dirty_nobuffers(dpage);
357 
358 		unlock_page(dpage);
359 		page_cache_release(dpage);
360 		unlock_page(page);
361 	}
362 	pagevec_release(&pvec);
363 	cond_resched();
364 
365 	if (likely(!err))
366 		goto repeat;
367 	return err;
368 }
369 
370 /**
371  * nilfs_copy_back_pages -- copy back pages to original cache from shadow cache
372  * @dmap: destination page cache
373  * @smap: source page cache
374  *
375  * No pages must no be added to the cache during this process.
376  * This must be ensured by the caller.
377  */
378 void nilfs_copy_back_pages(struct address_space *dmap,
379 			   struct address_space *smap)
380 {
381 	struct pagevec pvec;
382 	unsigned int i, n;
383 	pgoff_t index = 0;
384 	int err;
385 
386 	pagevec_init(&pvec, 0);
387 repeat:
388 	n = pagevec_lookup(&pvec, smap, index, PAGEVEC_SIZE);
389 	if (!n)
390 		return;
391 	index = pvec.pages[n - 1]->index + 1;
392 
393 	for (i = 0; i < pagevec_count(&pvec); i++) {
394 		struct page *page = pvec.pages[i], *dpage;
395 		pgoff_t offset = page->index;
396 
397 		lock_page(page);
398 		dpage = find_lock_page(dmap, offset);
399 		if (dpage) {
400 			/* override existing page on the destination cache */
401 			WARN_ON(PageDirty(dpage));
402 			nilfs_copy_page(dpage, page, 0);
403 			unlock_page(dpage);
404 			page_cache_release(dpage);
405 		} else {
406 			struct page *page2;
407 
408 			/* move the page to the destination cache */
409 			spin_lock_irq(&smap->tree_lock);
410 			page2 = radix_tree_delete(&smap->page_tree, offset);
411 			WARN_ON(page2 != page);
412 
413 			smap->nrpages--;
414 			spin_unlock_irq(&smap->tree_lock);
415 
416 			spin_lock_irq(&dmap->tree_lock);
417 			err = radix_tree_insert(&dmap->page_tree, offset, page);
418 			if (unlikely(err < 0)) {
419 				WARN_ON(err == -EEXIST);
420 				page->mapping = NULL;
421 				page_cache_release(page); /* for cache */
422 			} else {
423 				page->mapping = dmap;
424 				dmap->nrpages++;
425 				if (PageDirty(page))
426 					radix_tree_tag_set(&dmap->page_tree,
427 							   offset,
428 							   PAGECACHE_TAG_DIRTY);
429 			}
430 			spin_unlock_irq(&dmap->tree_lock);
431 		}
432 		unlock_page(page);
433 	}
434 	pagevec_release(&pvec);
435 	cond_resched();
436 
437 	goto repeat;
438 }
439 
440 void nilfs_clear_dirty_pages(struct address_space *mapping)
441 {
442 	struct pagevec pvec;
443 	unsigned int i;
444 	pgoff_t index = 0;
445 
446 	pagevec_init(&pvec, 0);
447 
448 	while (pagevec_lookup_tag(&pvec, mapping, &index, PAGECACHE_TAG_DIRTY,
449 				  PAGEVEC_SIZE)) {
450 		for (i = 0; i < pagevec_count(&pvec); i++) {
451 			struct page *page = pvec.pages[i];
452 			struct buffer_head *bh, *head;
453 
454 			lock_page(page);
455 			ClearPageUptodate(page);
456 			ClearPageMappedToDisk(page);
457 			bh = head = page_buffers(page);
458 			do {
459 				lock_buffer(bh);
460 				clear_buffer_dirty(bh);
461 				clear_buffer_nilfs_volatile(bh);
462 				clear_buffer_nilfs_checked(bh);
463 				clear_buffer_nilfs_redirected(bh);
464 				clear_buffer_uptodate(bh);
465 				clear_buffer_mapped(bh);
466 				unlock_buffer(bh);
467 				bh = bh->b_this_page;
468 			} while (bh != head);
469 
470 			__nilfs_clear_page_dirty(page);
471 			unlock_page(page);
472 		}
473 		pagevec_release(&pvec);
474 		cond_resched();
475 	}
476 }
477 
478 unsigned nilfs_page_count_clean_buffers(struct page *page,
479 					unsigned from, unsigned to)
480 {
481 	unsigned block_start, block_end;
482 	struct buffer_head *bh, *head;
483 	unsigned nc = 0;
484 
485 	for (bh = head = page_buffers(page), block_start = 0;
486 	     bh != head || !block_start;
487 	     block_start = block_end, bh = bh->b_this_page) {
488 		block_end = block_start + bh->b_size;
489 		if (block_end > from && block_start < to && !buffer_dirty(bh))
490 			nc++;
491 	}
492 	return nc;
493 }
494 
495 void nilfs_mapping_init_once(struct address_space *mapping)
496 {
497 	memset(mapping, 0, sizeof(*mapping));
498 	INIT_RADIX_TREE(&mapping->page_tree, GFP_ATOMIC);
499 	spin_lock_init(&mapping->tree_lock);
500 	INIT_LIST_HEAD(&mapping->private_list);
501 	spin_lock_init(&mapping->private_lock);
502 
503 	spin_lock_init(&mapping->i_mmap_lock);
504 	INIT_RAW_PRIO_TREE_ROOT(&mapping->i_mmap);
505 	INIT_LIST_HEAD(&mapping->i_mmap_nonlinear);
506 }
507 
508 void nilfs_mapping_init(struct address_space *mapping,
509 			struct backing_dev_info *bdi,
510 			const struct address_space_operations *aops)
511 {
512 	mapping->host = NULL;
513 	mapping->flags = 0;
514 	mapping_set_gfp_mask(mapping, GFP_NOFS);
515 	mapping->assoc_mapping = NULL;
516 	mapping->backing_dev_info = bdi;
517 	mapping->a_ops = aops;
518 }
519 
520 /*
521  * NILFS2 needs clear_page_dirty() in the following two cases:
522  *
523  * 1) For B-tree node pages and data pages of the dat/gcdat, NILFS2 clears
524  *    page dirty flags when it copies back pages from the shadow cache
525  *    (gcdat->{i_mapping,i_btnode_cache}) to its original cache
526  *    (dat->{i_mapping,i_btnode_cache}).
527  *
528  * 2) Some B-tree operations like insertion or deletion may dispose buffers
529  *    in dirty state, and this needs to cancel the dirty state of their pages.
530  */
531 int __nilfs_clear_page_dirty(struct page *page)
532 {
533 	struct address_space *mapping = page->mapping;
534 
535 	if (mapping) {
536 		spin_lock_irq(&mapping->tree_lock);
537 		if (test_bit(PG_dirty, &page->flags)) {
538 			radix_tree_tag_clear(&mapping->page_tree,
539 					     page_index(page),
540 					     PAGECACHE_TAG_DIRTY);
541 			spin_unlock_irq(&mapping->tree_lock);
542 			return clear_page_dirty_for_io(page);
543 		}
544 		spin_unlock_irq(&mapping->tree_lock);
545 		return 0;
546 	}
547 	return TestClearPageDirty(page);
548 }
549 
550 /**
551  * nilfs_find_uncommitted_extent - find extent of uncommitted data
552  * @inode: inode
553  * @start_blk: start block offset (in)
554  * @blkoff: start offset of the found extent (out)
555  *
556  * This function searches an extent of buffers marked "delayed" which
557  * starts from a block offset equal to or larger than @start_blk.  If
558  * such an extent was found, this will store the start offset in
559  * @blkoff and return its length in blocks.  Otherwise, zero is
560  * returned.
561  */
562 unsigned long nilfs_find_uncommitted_extent(struct inode *inode,
563 					    sector_t start_blk,
564 					    sector_t *blkoff)
565 {
566 	unsigned int i;
567 	pgoff_t index;
568 	unsigned int nblocks_in_page;
569 	unsigned long length = 0;
570 	sector_t b;
571 	struct pagevec pvec;
572 	struct page *page;
573 
574 	if (inode->i_mapping->nrpages == 0)
575 		return 0;
576 
577 	index = start_blk >> (PAGE_CACHE_SHIFT - inode->i_blkbits);
578 	nblocks_in_page = 1U << (PAGE_CACHE_SHIFT - inode->i_blkbits);
579 
580 	pagevec_init(&pvec, 0);
581 
582 repeat:
583 	pvec.nr = find_get_pages_contig(inode->i_mapping, index, PAGEVEC_SIZE,
584 					pvec.pages);
585 	if (pvec.nr == 0)
586 		return length;
587 
588 	if (length > 0 && pvec.pages[0]->index > index)
589 		goto out;
590 
591 	b = pvec.pages[0]->index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
592 	i = 0;
593 	do {
594 		page = pvec.pages[i];
595 
596 		lock_page(page);
597 		if (page_has_buffers(page)) {
598 			struct buffer_head *bh, *head;
599 
600 			bh = head = page_buffers(page);
601 			do {
602 				if (b < start_blk)
603 					continue;
604 				if (buffer_delay(bh)) {
605 					if (length == 0)
606 						*blkoff = b;
607 					length++;
608 				} else if (length > 0) {
609 					goto out_locked;
610 				}
611 			} while (++b, bh = bh->b_this_page, bh != head);
612 		} else {
613 			if (length > 0)
614 				goto out_locked;
615 
616 			b += nblocks_in_page;
617 		}
618 		unlock_page(page);
619 
620 	} while (++i < pagevec_count(&pvec));
621 
622 	index = page->index + 1;
623 	pagevec_release(&pvec);
624 	cond_resched();
625 	goto repeat;
626 
627 out_locked:
628 	unlock_page(page);
629 out:
630 	pagevec_release(&pvec);
631 	return length;
632 }
633