1 #include <linux/ceph/ceph_debug.h> 2 3 #include <linux/file.h> 4 #include <linux/namei.h> 5 6 #include "super.h" 7 #include "mds_client.h" 8 #include <linux/ceph/pagelist.h> 9 10 /** 11 * Implement fcntl and flock locking functions. 12 */ 13 static int ceph_lock_message(u8 lock_type, u16 operation, struct file *file, 14 u64 pid, u64 pid_ns, 15 int cmd, u64 start, u64 length, u8 wait) 16 { 17 struct inode *inode = file->f_dentry->d_inode; 18 struct ceph_mds_client *mdsc = 19 ceph_sb_to_client(inode->i_sb)->mdsc; 20 struct ceph_mds_request *req; 21 int err; 22 23 req = ceph_mdsc_create_request(mdsc, operation, USE_AUTH_MDS); 24 if (IS_ERR(req)) 25 return PTR_ERR(req); 26 req->r_inode = igrab(inode); 27 28 dout("ceph_lock_message: rule: %d, op: %d, pid: %llu, start: %llu, " 29 "length: %llu, wait: %d, type`: %d", (int)lock_type, 30 (int)operation, pid, start, length, wait, cmd); 31 32 req->r_args.filelock_change.rule = lock_type; 33 req->r_args.filelock_change.type = cmd; 34 req->r_args.filelock_change.pid = cpu_to_le64(pid); 35 /* This should be adjusted, but I'm not sure if 36 namespaces actually get id numbers*/ 37 req->r_args.filelock_change.pid_namespace = 38 cpu_to_le64((u64)pid_ns); 39 req->r_args.filelock_change.start = cpu_to_le64(start); 40 req->r_args.filelock_change.length = cpu_to_le64(length); 41 req->r_args.filelock_change.wait = wait; 42 43 err = ceph_mdsc_do_request(mdsc, inode, req); 44 ceph_mdsc_put_request(req); 45 dout("ceph_lock_message: rule: %d, op: %d, pid: %llu, start: %llu, " 46 "length: %llu, wait: %d, type`: %d err code %d", (int)lock_type, 47 (int)operation, pid, start, length, wait, cmd, err); 48 return err; 49 } 50 51 /** 52 * Attempt to set an fcntl lock. 53 * For now, this just goes away to the server. Later it may be more awesome. 54 */ 55 int ceph_lock(struct file *file, int cmd, struct file_lock *fl) 56 { 57 u64 length; 58 u8 lock_cmd; 59 int err; 60 u8 wait = 0; 61 u16 op = CEPH_MDS_OP_SETFILELOCK; 62 63 fl->fl_nspid = get_pid(task_tgid(current)); 64 dout("ceph_lock, fl_pid:%d", fl->fl_pid); 65 66 /* set wait bit as appropriate, then make command as Ceph expects it*/ 67 if (F_SETLKW == cmd) 68 wait = 1; 69 if (F_GETLK == cmd) 70 op = CEPH_MDS_OP_GETFILELOCK; 71 72 if (F_RDLCK == fl->fl_type) 73 lock_cmd = CEPH_LOCK_SHARED; 74 else if (F_WRLCK == fl->fl_type) 75 lock_cmd = CEPH_LOCK_EXCL; 76 else 77 lock_cmd = CEPH_LOCK_UNLOCK; 78 79 if (LLONG_MAX == fl->fl_end) 80 length = 0; 81 else 82 length = fl->fl_end - fl->fl_start + 1; 83 84 err = ceph_lock_message(CEPH_LOCK_FCNTL, op, file, 85 (u64)fl->fl_pid, 86 (u64)(unsigned long)fl->fl_nspid, 87 lock_cmd, fl->fl_start, 88 length, wait); 89 if (!err) { 90 dout("mds locked, locking locally"); 91 err = posix_lock_file(file, fl, NULL); 92 if (err && (CEPH_MDS_OP_SETFILELOCK == op)) { 93 /* undo! This should only happen if the kernel detects 94 * local deadlock. */ 95 ceph_lock_message(CEPH_LOCK_FCNTL, op, file, 96 (u64)fl->fl_pid, 97 (u64)(unsigned long)fl->fl_nspid, 98 CEPH_LOCK_UNLOCK, fl->fl_start, 99 length, 0); 100 dout("got %d on posix_lock_file, undid lock", err); 101 } 102 } else { 103 dout("mds returned error code %d", err); 104 } 105 return err; 106 } 107 108 int ceph_flock(struct file *file, int cmd, struct file_lock *fl) 109 { 110 u64 length; 111 u8 lock_cmd; 112 int err; 113 u8 wait = 1; 114 115 fl->fl_nspid = get_pid(task_tgid(current)); 116 dout("ceph_flock, fl_pid:%d", fl->fl_pid); 117 118 /* set wait bit, then clear it out of cmd*/ 119 if (cmd & LOCK_NB) 120 wait = 0; 121 cmd = cmd & (LOCK_SH | LOCK_EX | LOCK_UN); 122 /* set command sequence that Ceph wants to see: 123 shared lock, exclusive lock, or unlock */ 124 if (LOCK_SH == cmd) 125 lock_cmd = CEPH_LOCK_SHARED; 126 else if (LOCK_EX == cmd) 127 lock_cmd = CEPH_LOCK_EXCL; 128 else 129 lock_cmd = CEPH_LOCK_UNLOCK; 130 /* mds requires start and length rather than start and end */ 131 if (LLONG_MAX == fl->fl_end) 132 length = 0; 133 else 134 length = fl->fl_end - fl->fl_start + 1; 135 136 err = ceph_lock_message(CEPH_LOCK_FLOCK, CEPH_MDS_OP_SETFILELOCK, 137 file, (u64)fl->fl_pid, 138 (u64)(unsigned long)fl->fl_nspid, 139 lock_cmd, fl->fl_start, 140 length, wait); 141 if (!err) { 142 err = flock_lock_file_wait(file, fl); 143 if (err) { 144 ceph_lock_message(CEPH_LOCK_FLOCK, 145 CEPH_MDS_OP_SETFILELOCK, 146 file, (u64)fl->fl_pid, 147 (u64)(unsigned long)fl->fl_nspid, 148 CEPH_LOCK_UNLOCK, fl->fl_start, 149 length, 0); 150 dout("got %d on flock_lock_file_wait, undid lock", err); 151 } 152 } else { 153 dout("mds error code %d", err); 154 } 155 return err; 156 } 157 158 /** 159 * Must be called with BKL already held. Fills in the passed 160 * counter variables, so you can prepare pagelist metadata before calling 161 * ceph_encode_locks. 162 */ 163 void ceph_count_locks(struct inode *inode, int *fcntl_count, int *flock_count) 164 { 165 struct file_lock *lock; 166 167 *fcntl_count = 0; 168 *flock_count = 0; 169 170 for (lock = inode->i_flock; lock != NULL; lock = lock->fl_next) { 171 if (lock->fl_flags & FL_POSIX) 172 ++(*fcntl_count); 173 else if (lock->fl_flags & FL_FLOCK) 174 ++(*flock_count); 175 } 176 dout("counted %d flock locks and %d fcntl locks", 177 *flock_count, *fcntl_count); 178 } 179 180 /** 181 * Encode the flock and fcntl locks for the given inode into the pagelist. 182 * Format is: #fcntl locks, sequential fcntl locks, #flock locks, 183 * sequential flock locks. 184 * Must be called with lock_flocks() already held. 185 * If we encounter more of a specific lock type than expected, 186 * we return the value 1. 187 */ 188 int ceph_encode_locks(struct inode *inode, struct ceph_pagelist *pagelist, 189 int num_fcntl_locks, int num_flock_locks) 190 { 191 struct file_lock *lock; 192 struct ceph_filelock cephlock; 193 int err = 0; 194 int seen_fcntl = 0; 195 int seen_flock = 0; 196 197 dout("encoding %d flock and %d fcntl locks", num_flock_locks, 198 num_fcntl_locks); 199 err = ceph_pagelist_append(pagelist, &num_fcntl_locks, sizeof(u32)); 200 if (err) 201 goto fail; 202 for (lock = inode->i_flock; lock != NULL; lock = lock->fl_next) { 203 if (lock->fl_flags & FL_POSIX) { 204 ++seen_fcntl; 205 if (seen_fcntl > num_fcntl_locks) { 206 err = -ENOSPC; 207 goto fail; 208 } 209 err = lock_to_ceph_filelock(lock, &cephlock); 210 if (err) 211 goto fail; 212 err = ceph_pagelist_append(pagelist, &cephlock, 213 sizeof(struct ceph_filelock)); 214 } 215 if (err) 216 goto fail; 217 } 218 219 err = ceph_pagelist_append(pagelist, &num_flock_locks, sizeof(u32)); 220 if (err) 221 goto fail; 222 for (lock = inode->i_flock; lock != NULL; lock = lock->fl_next) { 223 if (lock->fl_flags & FL_FLOCK) { 224 ++seen_flock; 225 if (seen_flock > num_flock_locks) { 226 err = -ENOSPC; 227 goto fail; 228 } 229 err = lock_to_ceph_filelock(lock, &cephlock); 230 if (err) 231 goto fail; 232 err = ceph_pagelist_append(pagelist, &cephlock, 233 sizeof(struct ceph_filelock)); 234 } 235 if (err) 236 goto fail; 237 } 238 fail: 239 return err; 240 } 241 242 /* 243 * Given a pointer to a lock, convert it to a ceph filelock 244 */ 245 int lock_to_ceph_filelock(struct file_lock *lock, 246 struct ceph_filelock *cephlock) 247 { 248 int err = 0; 249 250 cephlock->start = cpu_to_le64(lock->fl_start); 251 cephlock->length = cpu_to_le64(lock->fl_end - lock->fl_start + 1); 252 cephlock->client = cpu_to_le64(0); 253 cephlock->pid = cpu_to_le64(lock->fl_pid); 254 cephlock->pid_namespace = 255 cpu_to_le64((u64)(unsigned long)lock->fl_nspid); 256 257 switch (lock->fl_type) { 258 case F_RDLCK: 259 cephlock->type = CEPH_LOCK_SHARED; 260 break; 261 case F_WRLCK: 262 cephlock->type = CEPH_LOCK_EXCL; 263 break; 264 case F_UNLCK: 265 cephlock->type = CEPH_LOCK_UNLOCK; 266 break; 267 default: 268 dout("Have unknown lock type %d", lock->fl_type); 269 err = -EINVAL; 270 } 271 272 return err; 273 } 274