xref: /linux/fs/overlayfs/copy_up.c (revision 19d0070a2792181f79df01277fe00b83b9f7eda7)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  *
4  * Copyright (C) 2011 Novell Inc.
5  */
6 
7 #include <linux/module.h>
8 #include <linux/fs.h>
9 #include <linux/slab.h>
10 #include <linux/file.h>
11 #include <linux/splice.h>
12 #include <linux/xattr.h>
13 #include <linux/security.h>
14 #include <linux/uaccess.h>
15 #include <linux/sched/signal.h>
16 #include <linux/cred.h>
17 #include <linux/namei.h>
18 #include <linux/fdtable.h>
19 #include <linux/ratelimit.h>
20 #include <linux/exportfs.h>
21 #include "overlayfs.h"
22 
23 #define OVL_COPY_UP_CHUNK_SIZE (1 << 20)
24 
25 static int ovl_ccup_set(const char *buf, const struct kernel_param *param)
26 {
27 	pr_warn("\"check_copy_up\" module option is obsolete\n");
28 	return 0;
29 }
30 
31 static int ovl_ccup_get(char *buf, const struct kernel_param *param)
32 {
33 	return sprintf(buf, "N\n");
34 }
35 
36 module_param_call(check_copy_up, ovl_ccup_set, ovl_ccup_get, NULL, 0644);
37 MODULE_PARM_DESC(check_copy_up, "Obsolete; does nothing");
38 
39 static bool ovl_must_copy_xattr(const char *name)
40 {
41 	return !strcmp(name, XATTR_POSIX_ACL_ACCESS) ||
42 	       !strcmp(name, XATTR_POSIX_ACL_DEFAULT) ||
43 	       !strncmp(name, XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN);
44 }
45 
46 int ovl_copy_xattr(struct dentry *old, struct dentry *new)
47 {
48 	ssize_t list_size, size, value_size = 0;
49 	char *buf, *name, *value = NULL;
50 	int error = 0;
51 	size_t slen;
52 
53 	if (!(old->d_inode->i_opflags & IOP_XATTR) ||
54 	    !(new->d_inode->i_opflags & IOP_XATTR))
55 		return 0;
56 
57 	list_size = vfs_listxattr(old, NULL, 0);
58 	if (list_size <= 0) {
59 		if (list_size == -EOPNOTSUPP)
60 			return 0;
61 		return list_size;
62 	}
63 
64 	buf = kzalloc(list_size, GFP_KERNEL);
65 	if (!buf)
66 		return -ENOMEM;
67 
68 	list_size = vfs_listxattr(old, buf, list_size);
69 	if (list_size <= 0) {
70 		error = list_size;
71 		goto out;
72 	}
73 
74 	for (name = buf; list_size; name += slen) {
75 		slen = strnlen(name, list_size) + 1;
76 
77 		/* underlying fs providing us with an broken xattr list? */
78 		if (WARN_ON(slen > list_size)) {
79 			error = -EIO;
80 			break;
81 		}
82 		list_size -= slen;
83 
84 		if (ovl_is_private_xattr(name))
85 			continue;
86 retry:
87 		size = vfs_getxattr(old, name, value, value_size);
88 		if (size == -ERANGE)
89 			size = vfs_getxattr(old, name, NULL, 0);
90 
91 		if (size < 0) {
92 			error = size;
93 			break;
94 		}
95 
96 		if (size > value_size) {
97 			void *new;
98 
99 			new = krealloc(value, size, GFP_KERNEL);
100 			if (!new) {
101 				error = -ENOMEM;
102 				break;
103 			}
104 			value = new;
105 			value_size = size;
106 			goto retry;
107 		}
108 
109 		error = security_inode_copy_up_xattr(name);
110 		if (error < 0 && error != -EOPNOTSUPP)
111 			break;
112 		if (error == 1) {
113 			error = 0;
114 			continue; /* Discard */
115 		}
116 		error = vfs_setxattr(new, name, value, size, 0);
117 		if (error) {
118 			if (error != -EOPNOTSUPP || ovl_must_copy_xattr(name))
119 				break;
120 
121 			/* Ignore failure to copy unknown xattrs */
122 			error = 0;
123 		}
124 	}
125 	kfree(value);
126 out:
127 	kfree(buf);
128 	return error;
129 }
130 
131 static int ovl_copy_up_data(struct path *old, struct path *new, loff_t len)
132 {
133 	struct file *old_file;
134 	struct file *new_file;
135 	loff_t old_pos = 0;
136 	loff_t new_pos = 0;
137 	loff_t cloned;
138 	loff_t data_pos = -1;
139 	loff_t hole_len;
140 	bool skip_hole = false;
141 	int error = 0;
142 
143 	if (len == 0)
144 		return 0;
145 
146 	old_file = ovl_path_open(old, O_LARGEFILE | O_RDONLY);
147 	if (IS_ERR(old_file))
148 		return PTR_ERR(old_file);
149 
150 	new_file = ovl_path_open(new, O_LARGEFILE | O_WRONLY);
151 	if (IS_ERR(new_file)) {
152 		error = PTR_ERR(new_file);
153 		goto out_fput;
154 	}
155 
156 	/* Try to use clone_file_range to clone up within the same fs */
157 	cloned = do_clone_file_range(old_file, 0, new_file, 0, len, 0);
158 	if (cloned == len)
159 		goto out;
160 	/* Couldn't clone, so now we try to copy the data */
161 
162 	/* Check if lower fs supports seek operation */
163 	if (old_file->f_mode & FMODE_LSEEK &&
164 	    old_file->f_op->llseek)
165 		skip_hole = true;
166 
167 	while (len) {
168 		size_t this_len = OVL_COPY_UP_CHUNK_SIZE;
169 		long bytes;
170 
171 		if (len < this_len)
172 			this_len = len;
173 
174 		if (signal_pending_state(TASK_KILLABLE, current)) {
175 			error = -EINTR;
176 			break;
177 		}
178 
179 		/*
180 		 * Fill zero for hole will cost unnecessary disk space
181 		 * and meanwhile slow down the copy-up speed, so we do
182 		 * an optimization for hole during copy-up, it relies
183 		 * on SEEK_DATA implementation in lower fs so if lower
184 		 * fs does not support it, copy-up will behave as before.
185 		 *
186 		 * Detail logic of hole detection as below:
187 		 * When we detect next data position is larger than current
188 		 * position we will skip that hole, otherwise we copy
189 		 * data in the size of OVL_COPY_UP_CHUNK_SIZE. Actually,
190 		 * it may not recognize all kind of holes and sometimes
191 		 * only skips partial of hole area. However, it will be
192 		 * enough for most of the use cases.
193 		 */
194 
195 		if (skip_hole && data_pos < old_pos) {
196 			data_pos = vfs_llseek(old_file, old_pos, SEEK_DATA);
197 			if (data_pos > old_pos) {
198 				hole_len = data_pos - old_pos;
199 				len -= hole_len;
200 				old_pos = new_pos = data_pos;
201 				continue;
202 			} else if (data_pos == -ENXIO) {
203 				break;
204 			} else if (data_pos < 0) {
205 				skip_hole = false;
206 			}
207 		}
208 
209 		bytes = do_splice_direct(old_file, &old_pos,
210 					 new_file, &new_pos,
211 					 this_len, SPLICE_F_MOVE);
212 		if (bytes <= 0) {
213 			error = bytes;
214 			break;
215 		}
216 		WARN_ON(old_pos != new_pos);
217 
218 		len -= bytes;
219 	}
220 out:
221 	if (!error)
222 		error = vfs_fsync(new_file, 0);
223 	fput(new_file);
224 out_fput:
225 	fput(old_file);
226 	return error;
227 }
228 
229 static int ovl_set_size(struct dentry *upperdentry, struct kstat *stat)
230 {
231 	struct iattr attr = {
232 		.ia_valid = ATTR_SIZE,
233 		.ia_size = stat->size,
234 	};
235 
236 	return notify_change(upperdentry, &attr, NULL);
237 }
238 
239 static int ovl_set_timestamps(struct dentry *upperdentry, struct kstat *stat)
240 {
241 	struct iattr attr = {
242 		.ia_valid =
243 		     ATTR_ATIME | ATTR_MTIME | ATTR_ATIME_SET | ATTR_MTIME_SET,
244 		.ia_atime = stat->atime,
245 		.ia_mtime = stat->mtime,
246 	};
247 
248 	return notify_change(upperdentry, &attr, NULL);
249 }
250 
251 int ovl_set_attr(struct dentry *upperdentry, struct kstat *stat)
252 {
253 	int err = 0;
254 
255 	if (!S_ISLNK(stat->mode)) {
256 		struct iattr attr = {
257 			.ia_valid = ATTR_MODE,
258 			.ia_mode = stat->mode,
259 		};
260 		err = notify_change(upperdentry, &attr, NULL);
261 	}
262 	if (!err) {
263 		struct iattr attr = {
264 			.ia_valid = ATTR_UID | ATTR_GID,
265 			.ia_uid = stat->uid,
266 			.ia_gid = stat->gid,
267 		};
268 		err = notify_change(upperdentry, &attr, NULL);
269 	}
270 	if (!err)
271 		ovl_set_timestamps(upperdentry, stat);
272 
273 	return err;
274 }
275 
276 struct ovl_fh *ovl_encode_real_fh(struct dentry *real, bool is_upper)
277 {
278 	struct ovl_fh *fh;
279 	int fh_type, dwords;
280 	int buflen = MAX_HANDLE_SZ;
281 	uuid_t *uuid = &real->d_sb->s_uuid;
282 	int err;
283 
284 	/* Make sure the real fid stays 32bit aligned */
285 	BUILD_BUG_ON(OVL_FH_FID_OFFSET % 4);
286 	BUILD_BUG_ON(MAX_HANDLE_SZ + OVL_FH_FID_OFFSET > 255);
287 
288 	fh = kzalloc(buflen + OVL_FH_FID_OFFSET, GFP_KERNEL);
289 	if (!fh)
290 		return ERR_PTR(-ENOMEM);
291 
292 	/*
293 	 * We encode a non-connectable file handle for non-dir, because we
294 	 * only need to find the lower inode number and we don't want to pay
295 	 * the price or reconnecting the dentry.
296 	 */
297 	dwords = buflen >> 2;
298 	fh_type = exportfs_encode_fh(real, (void *)fh->fb.fid, &dwords, 0);
299 	buflen = (dwords << 2);
300 
301 	err = -EIO;
302 	if (WARN_ON(fh_type < 0) ||
303 	    WARN_ON(buflen > MAX_HANDLE_SZ) ||
304 	    WARN_ON(fh_type == FILEID_INVALID))
305 		goto out_err;
306 
307 	fh->fb.version = OVL_FH_VERSION;
308 	fh->fb.magic = OVL_FH_MAGIC;
309 	fh->fb.type = fh_type;
310 	fh->fb.flags = OVL_FH_FLAG_CPU_ENDIAN;
311 	/*
312 	 * When we will want to decode an overlay dentry from this handle
313 	 * and all layers are on the same fs, if we get a disconncted real
314 	 * dentry when we decode fid, the only way to tell if we should assign
315 	 * it to upperdentry or to lowerstack is by checking this flag.
316 	 */
317 	if (is_upper)
318 		fh->fb.flags |= OVL_FH_FLAG_PATH_UPPER;
319 	fh->fb.len = sizeof(fh->fb) + buflen;
320 	fh->fb.uuid = *uuid;
321 
322 	return fh;
323 
324 out_err:
325 	kfree(fh);
326 	return ERR_PTR(err);
327 }
328 
329 int ovl_set_origin(struct dentry *dentry, struct dentry *lower,
330 		   struct dentry *upper)
331 {
332 	const struct ovl_fh *fh = NULL;
333 	int err;
334 
335 	/*
336 	 * When lower layer doesn't support export operations store a 'null' fh,
337 	 * so we can use the overlay.origin xattr to distignuish between a copy
338 	 * up and a pure upper inode.
339 	 */
340 	if (ovl_can_decode_fh(lower->d_sb)) {
341 		fh = ovl_encode_real_fh(lower, false);
342 		if (IS_ERR(fh))
343 			return PTR_ERR(fh);
344 	}
345 
346 	/*
347 	 * Do not fail when upper doesn't support xattrs.
348 	 */
349 	err = ovl_check_setxattr(dentry, upper, OVL_XATTR_ORIGIN, fh->buf,
350 				 fh ? fh->fb.len : 0, 0);
351 	kfree(fh);
352 
353 	return err;
354 }
355 
356 /* Store file handle of @upper dir in @index dir entry */
357 static int ovl_set_upper_fh(struct dentry *upper, struct dentry *index)
358 {
359 	const struct ovl_fh *fh;
360 	int err;
361 
362 	fh = ovl_encode_real_fh(upper, true);
363 	if (IS_ERR(fh))
364 		return PTR_ERR(fh);
365 
366 	err = ovl_do_setxattr(index, OVL_XATTR_UPPER, fh->buf, fh->fb.len, 0);
367 
368 	kfree(fh);
369 	return err;
370 }
371 
372 /*
373  * Create and install index entry.
374  *
375  * Caller must hold i_mutex on indexdir.
376  */
377 static int ovl_create_index(struct dentry *dentry, struct dentry *origin,
378 			    struct dentry *upper)
379 {
380 	struct dentry *indexdir = ovl_indexdir(dentry->d_sb);
381 	struct inode *dir = d_inode(indexdir);
382 	struct dentry *index = NULL;
383 	struct dentry *temp = NULL;
384 	struct qstr name = { };
385 	int err;
386 
387 	/*
388 	 * For now this is only used for creating index entry for directories,
389 	 * because non-dir are copied up directly to index and then hardlinked
390 	 * to upper dir.
391 	 *
392 	 * TODO: implement create index for non-dir, so we can call it when
393 	 * encoding file handle for non-dir in case index does not exist.
394 	 */
395 	if (WARN_ON(!d_is_dir(dentry)))
396 		return -EIO;
397 
398 	/* Directory not expected to be indexed before copy up */
399 	if (WARN_ON(ovl_test_flag(OVL_INDEX, d_inode(dentry))))
400 		return -EIO;
401 
402 	err = ovl_get_index_name(origin, &name);
403 	if (err)
404 		return err;
405 
406 	temp = ovl_create_temp(indexdir, OVL_CATTR(S_IFDIR | 0));
407 	err = PTR_ERR(temp);
408 	if (IS_ERR(temp))
409 		goto free_name;
410 
411 	err = ovl_set_upper_fh(upper, temp);
412 	if (err)
413 		goto out;
414 
415 	index = lookup_one_len(name.name, indexdir, name.len);
416 	if (IS_ERR(index)) {
417 		err = PTR_ERR(index);
418 	} else {
419 		err = ovl_do_rename(dir, temp, dir, index, 0);
420 		dput(index);
421 	}
422 out:
423 	if (err)
424 		ovl_cleanup(dir, temp);
425 	dput(temp);
426 free_name:
427 	kfree(name.name);
428 	return err;
429 }
430 
431 struct ovl_copy_up_ctx {
432 	struct dentry *parent;
433 	struct dentry *dentry;
434 	struct path lowerpath;
435 	struct kstat stat;
436 	struct kstat pstat;
437 	const char *link;
438 	struct dentry *destdir;
439 	struct qstr destname;
440 	struct dentry *workdir;
441 	bool origin;
442 	bool indexed;
443 	bool metacopy;
444 };
445 
446 static int ovl_link_up(struct ovl_copy_up_ctx *c)
447 {
448 	int err;
449 	struct dentry *upper;
450 	struct dentry *upperdir = ovl_dentry_upper(c->parent);
451 	struct inode *udir = d_inode(upperdir);
452 
453 	/* Mark parent "impure" because it may now contain non-pure upper */
454 	err = ovl_set_impure(c->parent, upperdir);
455 	if (err)
456 		return err;
457 
458 	err = ovl_set_nlink_lower(c->dentry);
459 	if (err)
460 		return err;
461 
462 	inode_lock_nested(udir, I_MUTEX_PARENT);
463 	upper = lookup_one_len(c->dentry->d_name.name, upperdir,
464 			       c->dentry->d_name.len);
465 	err = PTR_ERR(upper);
466 	if (!IS_ERR(upper)) {
467 		err = ovl_do_link(ovl_dentry_upper(c->dentry), udir, upper);
468 		dput(upper);
469 
470 		if (!err) {
471 			/* Restore timestamps on parent (best effort) */
472 			ovl_set_timestamps(upperdir, &c->pstat);
473 			ovl_dentry_set_upper_alias(c->dentry);
474 		}
475 	}
476 	inode_unlock(udir);
477 	if (err)
478 		return err;
479 
480 	err = ovl_set_nlink_upper(c->dentry);
481 
482 	return err;
483 }
484 
485 static int ovl_copy_up_inode(struct ovl_copy_up_ctx *c, struct dentry *temp)
486 {
487 	int err;
488 
489 	/*
490 	 * Copy up data first and then xattrs. Writing data after
491 	 * xattrs will remove security.capability xattr automatically.
492 	 */
493 	if (S_ISREG(c->stat.mode) && !c->metacopy) {
494 		struct path upperpath, datapath;
495 
496 		ovl_path_upper(c->dentry, &upperpath);
497 		if (WARN_ON(upperpath.dentry != NULL))
498 			return -EIO;
499 		upperpath.dentry = temp;
500 
501 		ovl_path_lowerdata(c->dentry, &datapath);
502 		err = ovl_copy_up_data(&datapath, &upperpath, c->stat.size);
503 		if (err)
504 			return err;
505 	}
506 
507 	err = ovl_copy_xattr(c->lowerpath.dentry, temp);
508 	if (err)
509 		return err;
510 
511 	/*
512 	 * Store identifier of lower inode in upper inode xattr to
513 	 * allow lookup of the copy up origin inode.
514 	 *
515 	 * Don't set origin when we are breaking the association with a lower
516 	 * hard link.
517 	 */
518 	if (c->origin) {
519 		err = ovl_set_origin(c->dentry, c->lowerpath.dentry, temp);
520 		if (err)
521 			return err;
522 	}
523 
524 	if (c->metacopy) {
525 		err = ovl_check_setxattr(c->dentry, temp, OVL_XATTR_METACOPY,
526 					 NULL, 0, -EOPNOTSUPP);
527 		if (err)
528 			return err;
529 	}
530 
531 	inode_lock(temp->d_inode);
532 	if (S_ISREG(c->stat.mode))
533 		err = ovl_set_size(temp, &c->stat);
534 	if (!err)
535 		err = ovl_set_attr(temp, &c->stat);
536 	inode_unlock(temp->d_inode);
537 
538 	return err;
539 }
540 
541 struct ovl_cu_creds {
542 	const struct cred *old;
543 	struct cred *new;
544 };
545 
546 static int ovl_prep_cu_creds(struct dentry *dentry, struct ovl_cu_creds *cc)
547 {
548 	int err;
549 
550 	cc->old = cc->new = NULL;
551 	err = security_inode_copy_up(dentry, &cc->new);
552 	if (err < 0)
553 		return err;
554 
555 	if (cc->new)
556 		cc->old = override_creds(cc->new);
557 
558 	return 0;
559 }
560 
561 static void ovl_revert_cu_creds(struct ovl_cu_creds *cc)
562 {
563 	if (cc->new) {
564 		revert_creds(cc->old);
565 		put_cred(cc->new);
566 	}
567 }
568 
569 /*
570  * Copyup using workdir to prepare temp file.  Used when copying up directories,
571  * special files or when upper fs doesn't support O_TMPFILE.
572  */
573 static int ovl_copy_up_workdir(struct ovl_copy_up_ctx *c)
574 {
575 	struct inode *inode;
576 	struct inode *udir = d_inode(c->destdir), *wdir = d_inode(c->workdir);
577 	struct dentry *temp, *upper;
578 	struct ovl_cu_creds cc;
579 	int err;
580 	struct ovl_cattr cattr = {
581 		/* Can't properly set mode on creation because of the umask */
582 		.mode = c->stat.mode & S_IFMT,
583 		.rdev = c->stat.rdev,
584 		.link = c->link
585 	};
586 
587 	/* workdir and destdir could be the same when copying up to indexdir */
588 	err = -EIO;
589 	if (lock_rename(c->workdir, c->destdir) != NULL)
590 		goto unlock;
591 
592 	err = ovl_prep_cu_creds(c->dentry, &cc);
593 	if (err)
594 		goto unlock;
595 
596 	temp = ovl_create_temp(c->workdir, &cattr);
597 	ovl_revert_cu_creds(&cc);
598 
599 	err = PTR_ERR(temp);
600 	if (IS_ERR(temp))
601 		goto unlock;
602 
603 	err = ovl_copy_up_inode(c, temp);
604 	if (err)
605 		goto cleanup;
606 
607 	if (S_ISDIR(c->stat.mode) && c->indexed) {
608 		err = ovl_create_index(c->dentry, c->lowerpath.dentry, temp);
609 		if (err)
610 			goto cleanup;
611 	}
612 
613 	upper = lookup_one_len(c->destname.name, c->destdir, c->destname.len);
614 	err = PTR_ERR(upper);
615 	if (IS_ERR(upper))
616 		goto cleanup;
617 
618 	err = ovl_do_rename(wdir, temp, udir, upper, 0);
619 	dput(upper);
620 	if (err)
621 		goto cleanup;
622 
623 	if (!c->metacopy)
624 		ovl_set_upperdata(d_inode(c->dentry));
625 	inode = d_inode(c->dentry);
626 	ovl_inode_update(inode, temp);
627 	if (S_ISDIR(inode->i_mode))
628 		ovl_set_flag(OVL_WHITEOUTS, inode);
629 unlock:
630 	unlock_rename(c->workdir, c->destdir);
631 
632 	return err;
633 
634 cleanup:
635 	ovl_cleanup(wdir, temp);
636 	dput(temp);
637 	goto unlock;
638 }
639 
640 /* Copyup using O_TMPFILE which does not require cross dir locking */
641 static int ovl_copy_up_tmpfile(struct ovl_copy_up_ctx *c)
642 {
643 	struct inode *udir = d_inode(c->destdir);
644 	struct dentry *temp, *upper;
645 	struct ovl_cu_creds cc;
646 	int err;
647 
648 	err = ovl_prep_cu_creds(c->dentry, &cc);
649 	if (err)
650 		return err;
651 
652 	temp = ovl_do_tmpfile(c->workdir, c->stat.mode);
653 	ovl_revert_cu_creds(&cc);
654 
655 	if (IS_ERR(temp))
656 		return PTR_ERR(temp);
657 
658 	err = ovl_copy_up_inode(c, temp);
659 	if (err)
660 		goto out_dput;
661 
662 	inode_lock_nested(udir, I_MUTEX_PARENT);
663 
664 	upper = lookup_one_len(c->destname.name, c->destdir, c->destname.len);
665 	err = PTR_ERR(upper);
666 	if (!IS_ERR(upper)) {
667 		err = ovl_do_link(temp, udir, upper);
668 		dput(upper);
669 	}
670 	inode_unlock(udir);
671 
672 	if (err)
673 		goto out_dput;
674 
675 	if (!c->metacopy)
676 		ovl_set_upperdata(d_inode(c->dentry));
677 	ovl_inode_update(d_inode(c->dentry), temp);
678 
679 	return 0;
680 
681 out_dput:
682 	dput(temp);
683 	return err;
684 }
685 
686 /*
687  * Copy up a single dentry
688  *
689  * All renames start with copy up of source if necessary.  The actual
690  * rename will only proceed once the copy up was successful.  Copy up uses
691  * upper parent i_mutex for exclusion.  Since rename can change d_parent it
692  * is possible that the copy up will lock the old parent.  At that point
693  * the file will have already been copied up anyway.
694  */
695 static int ovl_do_copy_up(struct ovl_copy_up_ctx *c)
696 {
697 	int err;
698 	struct ovl_fs *ofs = c->dentry->d_sb->s_fs_info;
699 	bool to_index = false;
700 
701 	/*
702 	 * Indexed non-dir is copied up directly to the index entry and then
703 	 * hardlinked to upper dir. Indexed dir is copied up to indexdir,
704 	 * then index entry is created and then copied up dir installed.
705 	 * Copying dir up to indexdir instead of workdir simplifies locking.
706 	 */
707 	if (ovl_need_index(c->dentry)) {
708 		c->indexed = true;
709 		if (S_ISDIR(c->stat.mode))
710 			c->workdir = ovl_indexdir(c->dentry->d_sb);
711 		else
712 			to_index = true;
713 	}
714 
715 	if (S_ISDIR(c->stat.mode) || c->stat.nlink == 1 || to_index)
716 		c->origin = true;
717 
718 	if (to_index) {
719 		c->destdir = ovl_indexdir(c->dentry->d_sb);
720 		err = ovl_get_index_name(c->lowerpath.dentry, &c->destname);
721 		if (err)
722 			return err;
723 	} else if (WARN_ON(!c->parent)) {
724 		/* Disconnected dentry must be copied up to index dir */
725 		return -EIO;
726 	} else {
727 		/*
728 		 * Mark parent "impure" because it may now contain non-pure
729 		 * upper
730 		 */
731 		err = ovl_set_impure(c->parent, c->destdir);
732 		if (err)
733 			return err;
734 	}
735 
736 	/* Should we copyup with O_TMPFILE or with workdir? */
737 	if (S_ISREG(c->stat.mode) && ofs->tmpfile)
738 		err = ovl_copy_up_tmpfile(c);
739 	else
740 		err = ovl_copy_up_workdir(c);
741 	if (err)
742 		goto out;
743 
744 	if (c->indexed)
745 		ovl_set_flag(OVL_INDEX, d_inode(c->dentry));
746 
747 	if (to_index) {
748 		/* Initialize nlink for copy up of disconnected dentry */
749 		err = ovl_set_nlink_upper(c->dentry);
750 	} else {
751 		struct inode *udir = d_inode(c->destdir);
752 
753 		/* Restore timestamps on parent (best effort) */
754 		inode_lock(udir);
755 		ovl_set_timestamps(c->destdir, &c->pstat);
756 		inode_unlock(udir);
757 
758 		ovl_dentry_set_upper_alias(c->dentry);
759 	}
760 
761 out:
762 	if (to_index)
763 		kfree(c->destname.name);
764 	return err;
765 }
766 
767 static bool ovl_need_meta_copy_up(struct dentry *dentry, umode_t mode,
768 				  int flags)
769 {
770 	struct ovl_fs *ofs = dentry->d_sb->s_fs_info;
771 
772 	if (!ofs->config.metacopy)
773 		return false;
774 
775 	if (!S_ISREG(mode))
776 		return false;
777 
778 	if (flags && ((OPEN_FMODE(flags) & FMODE_WRITE) || (flags & O_TRUNC)))
779 		return false;
780 
781 	return true;
782 }
783 
784 /* Copy up data of an inode which was copied up metadata only in the past. */
785 static int ovl_copy_up_meta_inode_data(struct ovl_copy_up_ctx *c)
786 {
787 	struct path upperpath, datapath;
788 	int err;
789 	char *capability = NULL;
790 	ssize_t cap_size;
791 
792 	ovl_path_upper(c->dentry, &upperpath);
793 	if (WARN_ON(upperpath.dentry == NULL))
794 		return -EIO;
795 
796 	ovl_path_lowerdata(c->dentry, &datapath);
797 	if (WARN_ON(datapath.dentry == NULL))
798 		return -EIO;
799 
800 	if (c->stat.size) {
801 		err = cap_size = ovl_getxattr(upperpath.dentry, XATTR_NAME_CAPS,
802 					      &capability, 0);
803 		if (err < 0 && err != -ENODATA)
804 			goto out;
805 	}
806 
807 	err = ovl_copy_up_data(&datapath, &upperpath, c->stat.size);
808 	if (err)
809 		goto out_free;
810 
811 	/*
812 	 * Writing to upper file will clear security.capability xattr. We
813 	 * don't want that to happen for normal copy-up operation.
814 	 */
815 	if (capability) {
816 		err = ovl_do_setxattr(upperpath.dentry, XATTR_NAME_CAPS,
817 				      capability, cap_size, 0);
818 		if (err)
819 			goto out_free;
820 	}
821 
822 
823 	err = vfs_removexattr(upperpath.dentry, OVL_XATTR_METACOPY);
824 	if (err)
825 		goto out_free;
826 
827 	ovl_set_upperdata(d_inode(c->dentry));
828 out_free:
829 	kfree(capability);
830 out:
831 	return err;
832 }
833 
834 static int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry,
835 			   int flags)
836 {
837 	int err;
838 	DEFINE_DELAYED_CALL(done);
839 	struct path parentpath;
840 	struct ovl_copy_up_ctx ctx = {
841 		.parent = parent,
842 		.dentry = dentry,
843 		.workdir = ovl_workdir(dentry),
844 	};
845 
846 	if (WARN_ON(!ctx.workdir))
847 		return -EROFS;
848 
849 	ovl_path_lower(dentry, &ctx.lowerpath);
850 	err = vfs_getattr(&ctx.lowerpath, &ctx.stat,
851 			  STATX_BASIC_STATS, AT_STATX_SYNC_AS_STAT);
852 	if (err)
853 		return err;
854 
855 	ctx.metacopy = ovl_need_meta_copy_up(dentry, ctx.stat.mode, flags);
856 
857 	if (parent) {
858 		ovl_path_upper(parent, &parentpath);
859 		ctx.destdir = parentpath.dentry;
860 		ctx.destname = dentry->d_name;
861 
862 		err = vfs_getattr(&parentpath, &ctx.pstat,
863 				  STATX_ATIME | STATX_MTIME,
864 				  AT_STATX_SYNC_AS_STAT);
865 		if (err)
866 			return err;
867 	}
868 
869 	/* maybe truncate regular file. this has no effect on dirs */
870 	if (flags & O_TRUNC)
871 		ctx.stat.size = 0;
872 
873 	if (S_ISLNK(ctx.stat.mode)) {
874 		ctx.link = vfs_get_link(ctx.lowerpath.dentry, &done);
875 		if (IS_ERR(ctx.link))
876 			return PTR_ERR(ctx.link);
877 	}
878 
879 	err = ovl_copy_up_start(dentry, flags);
880 	/* err < 0: interrupted, err > 0: raced with another copy-up */
881 	if (unlikely(err)) {
882 		if (err > 0)
883 			err = 0;
884 	} else {
885 		if (!ovl_dentry_upper(dentry))
886 			err = ovl_do_copy_up(&ctx);
887 		if (!err && parent && !ovl_dentry_has_upper_alias(dentry))
888 			err = ovl_link_up(&ctx);
889 		if (!err && ovl_dentry_needs_data_copy_up_locked(dentry, flags))
890 			err = ovl_copy_up_meta_inode_data(&ctx);
891 		ovl_copy_up_end(dentry);
892 	}
893 	do_delayed_call(&done);
894 
895 	return err;
896 }
897 
898 static int ovl_copy_up_flags(struct dentry *dentry, int flags)
899 {
900 	int err = 0;
901 	const struct cred *old_cred = ovl_override_creds(dentry->d_sb);
902 	bool disconnected = (dentry->d_flags & DCACHE_DISCONNECTED);
903 
904 	/*
905 	 * With NFS export, copy up can get called for a disconnected non-dir.
906 	 * In this case, we will copy up lower inode to index dir without
907 	 * linking it to upper dir.
908 	 */
909 	if (WARN_ON(disconnected && d_is_dir(dentry)))
910 		return -EIO;
911 
912 	while (!err) {
913 		struct dentry *next;
914 		struct dentry *parent = NULL;
915 
916 		if (ovl_already_copied_up(dentry, flags))
917 			break;
918 
919 		next = dget(dentry);
920 		/* find the topmost dentry not yet copied up */
921 		for (; !disconnected;) {
922 			parent = dget_parent(next);
923 
924 			if (ovl_dentry_upper(parent))
925 				break;
926 
927 			dput(next);
928 			next = parent;
929 		}
930 
931 		err = ovl_copy_up_one(parent, next, flags);
932 
933 		dput(parent);
934 		dput(next);
935 	}
936 	revert_creds(old_cred);
937 
938 	return err;
939 }
940 
941 static bool ovl_open_need_copy_up(struct dentry *dentry, int flags)
942 {
943 	/* Copy up of disconnected dentry does not set upper alias */
944 	if (ovl_already_copied_up(dentry, flags))
945 		return false;
946 
947 	if (special_file(d_inode(dentry)->i_mode))
948 		return false;
949 
950 	if (!ovl_open_flags_need_copy_up(flags))
951 		return false;
952 
953 	return true;
954 }
955 
956 int ovl_maybe_copy_up(struct dentry *dentry, int flags)
957 {
958 	int err = 0;
959 
960 	if (ovl_open_need_copy_up(dentry, flags)) {
961 		err = ovl_want_write(dentry);
962 		if (!err) {
963 			err = ovl_copy_up_flags(dentry, flags);
964 			ovl_drop_write(dentry);
965 		}
966 	}
967 
968 	return err;
969 }
970 
971 int ovl_copy_up_with_data(struct dentry *dentry)
972 {
973 	return ovl_copy_up_flags(dentry, O_WRONLY);
974 }
975 
976 int ovl_copy_up(struct dentry *dentry)
977 {
978 	return ovl_copy_up_flags(dentry, 0);
979 }
980