xref: /linux/fs/afs/fs_probe.c (revision 06ed6aa56ffac9241e03a24649e8d048f8f1b10c)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /* AFS fileserver probing
3  *
4  * Copyright (C) 2018 Red Hat, Inc. All Rights Reserved.
5  * Written by David Howells (dhowells@redhat.com)
6  */
7 
8 #include <linux/sched.h>
9 #include <linux/slab.h>
10 #include "afs_fs.h"
11 #include "internal.h"
12 #include "protocol_yfs.h"
13 
14 static bool afs_fs_probe_done(struct afs_server *server)
15 {
16 	if (!atomic_dec_and_test(&server->probe_outstanding))
17 		return false;
18 
19 	wake_up_var(&server->probe_outstanding);
20 	clear_bit_unlock(AFS_SERVER_FL_PROBING, &server->flags);
21 	wake_up_bit(&server->flags, AFS_SERVER_FL_PROBING);
22 	return true;
23 }
24 
25 /*
26  * Process the result of probing a fileserver.  This is called after successful
27  * or failed delivery of an FS.GetCapabilities operation.
28  */
29 void afs_fileserver_probe_result(struct afs_call *call)
30 {
31 	struct afs_addr_list *alist = call->alist;
32 	struct afs_server *server = call->server;
33 	unsigned int server_index = call->server_index;
34 	unsigned int index = call->addr_ix;
35 	unsigned int rtt = UINT_MAX;
36 	bool have_result = false;
37 	u64 _rtt;
38 	int ret = call->error;
39 
40 	_enter("%pU,%u", &server->uuid, index);
41 
42 	spin_lock(&server->probe_lock);
43 
44 	switch (ret) {
45 	case 0:
46 		server->probe.error = 0;
47 		goto responded;
48 	case -ECONNABORTED:
49 		if (!server->probe.responded) {
50 			server->probe.abort_code = call->abort_code;
51 			server->probe.error = ret;
52 		}
53 		goto responded;
54 	case -ENOMEM:
55 	case -ENONET:
56 		server->probe.local_failure = true;
57 		afs_io_error(call, afs_io_error_fs_probe_fail);
58 		goto out;
59 	case -ECONNRESET: /* Responded, but call expired. */
60 	case -ERFKILL:
61 	case -EADDRNOTAVAIL:
62 	case -ENETUNREACH:
63 	case -EHOSTUNREACH:
64 	case -EHOSTDOWN:
65 	case -ECONNREFUSED:
66 	case -ETIMEDOUT:
67 	case -ETIME:
68 	default:
69 		clear_bit(index, &alist->responded);
70 		set_bit(index, &alist->failed);
71 		if (!server->probe.responded &&
72 		    (server->probe.error == 0 ||
73 		     server->probe.error == -ETIMEDOUT ||
74 		     server->probe.error == -ETIME))
75 			server->probe.error = ret;
76 		afs_io_error(call, afs_io_error_fs_probe_fail);
77 		goto out;
78 	}
79 
80 responded:
81 	set_bit(index, &alist->responded);
82 	clear_bit(index, &alist->failed);
83 
84 	if (call->service_id == YFS_FS_SERVICE) {
85 		server->probe.is_yfs = true;
86 		set_bit(AFS_SERVER_FL_IS_YFS, &server->flags);
87 		alist->addrs[index].srx_service = call->service_id;
88 	} else {
89 		server->probe.not_yfs = true;
90 		if (!server->probe.is_yfs) {
91 			clear_bit(AFS_SERVER_FL_IS_YFS, &server->flags);
92 			alist->addrs[index].srx_service = call->service_id;
93 		}
94 	}
95 
96 	/* Get the RTT and scale it to fit into a 32-bit value that represents
97 	 * over a minute of time so that we can access it with one instruction
98 	 * on a 32-bit system.
99 	 */
100 	_rtt = rxrpc_kernel_get_rtt(call->net->socket, call->rxcall);
101 	_rtt /= 64;
102 	rtt = (_rtt > UINT_MAX) ? UINT_MAX : _rtt;
103 	if (rtt < server->probe.rtt) {
104 		server->probe.rtt = rtt;
105 		alist->preferred = index;
106 		have_result = true;
107 	}
108 
109 	smp_wmb(); /* Set rtt before responded. */
110 	server->probe.responded = true;
111 	set_bit(AFS_SERVER_FL_PROBED, &server->flags);
112 out:
113 	spin_unlock(&server->probe_lock);
114 
115 	_debug("probe [%u][%u] %pISpc rtt=%u ret=%d",
116 	       server_index, index, &alist->addrs[index].transport,
117 	       (unsigned int)rtt, ret);
118 
119 	have_result |= afs_fs_probe_done(server);
120 	if (have_result)
121 		wake_up_all(&server->probe_wq);
122 }
123 
124 /*
125  * Probe all of a fileserver's addresses to find out the best route and to
126  * query its capabilities.
127  */
128 static int afs_do_probe_fileserver(struct afs_net *net,
129 				   struct afs_server *server,
130 				   struct key *key,
131 				   unsigned int server_index,
132 				   struct afs_error *_e)
133 {
134 	struct afs_addr_cursor ac = {
135 		.index = 0,
136 	};
137 	struct afs_call *call;
138 	bool in_progress = false;
139 
140 	_enter("%pU", &server->uuid);
141 
142 	read_lock(&server->fs_lock);
143 	ac.alist = rcu_dereference_protected(server->addresses,
144 					     lockdep_is_held(&server->fs_lock));
145 	afs_get_addrlist(ac.alist);
146 	read_unlock(&server->fs_lock);
147 
148 	atomic_set(&server->probe_outstanding, ac.alist->nr_addrs);
149 	memset(&server->probe, 0, sizeof(server->probe));
150 	server->probe.rtt = UINT_MAX;
151 
152 	for (ac.index = 0; ac.index < ac.alist->nr_addrs; ac.index++) {
153 		call = afs_fs_get_capabilities(net, server, &ac, key, server_index);
154 		if (!IS_ERR(call)) {
155 			afs_put_call(call);
156 			in_progress = true;
157 		} else {
158 			afs_prioritise_error(_e, PTR_ERR(call), ac.abort_code);
159 		}
160 	}
161 
162 	if (!in_progress)
163 		afs_fs_probe_done(server);
164 	afs_put_addrlist(ac.alist);
165 	return in_progress;
166 }
167 
168 /*
169  * Send off probes to all unprobed servers.
170  */
171 int afs_probe_fileservers(struct afs_net *net, struct key *key,
172 			  struct afs_server_list *list)
173 {
174 	struct afs_server *server;
175 	struct afs_error e;
176 	bool in_progress = false;
177 	int i;
178 
179 	e.error = 0;
180 	e.responded = false;
181 	for (i = 0; i < list->nr_servers; i++) {
182 		server = list->servers[i].server;
183 		if (test_bit(AFS_SERVER_FL_PROBED, &server->flags))
184 			continue;
185 
186 		if (!test_and_set_bit_lock(AFS_SERVER_FL_PROBING, &server->flags) &&
187 		    afs_do_probe_fileserver(net, server, key, i, &e))
188 			in_progress = true;
189 	}
190 
191 	return in_progress ? 0 : e.error;
192 }
193 
194 /*
195  * Wait for the first as-yet untried fileserver to respond.
196  */
197 int afs_wait_for_fs_probes(struct afs_server_list *slist, unsigned long untried)
198 {
199 	struct wait_queue_entry *waits;
200 	struct afs_server *server;
201 	unsigned int rtt = UINT_MAX;
202 	bool have_responders = false;
203 	int pref = -1, i;
204 
205 	_enter("%u,%lx", slist->nr_servers, untried);
206 
207 	/* Only wait for servers that have a probe outstanding. */
208 	for (i = 0; i < slist->nr_servers; i++) {
209 		if (test_bit(i, &untried)) {
210 			server = slist->servers[i].server;
211 			if (!test_bit(AFS_SERVER_FL_PROBING, &server->flags))
212 				__clear_bit(i, &untried);
213 			if (server->probe.responded)
214 				have_responders = true;
215 		}
216 	}
217 	if (have_responders || !untried)
218 		return 0;
219 
220 	waits = kmalloc(array_size(slist->nr_servers, sizeof(*waits)), GFP_KERNEL);
221 	if (!waits)
222 		return -ENOMEM;
223 
224 	for (i = 0; i < slist->nr_servers; i++) {
225 		if (test_bit(i, &untried)) {
226 			server = slist->servers[i].server;
227 			init_waitqueue_entry(&waits[i], current);
228 			add_wait_queue(&server->probe_wq, &waits[i]);
229 		}
230 	}
231 
232 	for (;;) {
233 		bool still_probing = false;
234 
235 		set_current_state(TASK_INTERRUPTIBLE);
236 		for (i = 0; i < slist->nr_servers; i++) {
237 			if (test_bit(i, &untried)) {
238 				server = slist->servers[i].server;
239 				if (server->probe.responded)
240 					goto stop;
241 				if (test_bit(AFS_SERVER_FL_PROBING, &server->flags))
242 					still_probing = true;
243 			}
244 		}
245 
246 		if (!still_probing || signal_pending(current))
247 			goto stop;
248 		schedule();
249 	}
250 
251 stop:
252 	set_current_state(TASK_RUNNING);
253 
254 	for (i = 0; i < slist->nr_servers; i++) {
255 		if (test_bit(i, &untried)) {
256 			server = slist->servers[i].server;
257 			if (server->probe.responded &&
258 			    server->probe.rtt < rtt) {
259 				pref = i;
260 				rtt = server->probe.rtt;
261 			}
262 
263 			remove_wait_queue(&server->probe_wq, &waits[i]);
264 		}
265 	}
266 
267 	kfree(waits);
268 
269 	if (pref == -1 && signal_pending(current))
270 		return -ERESTARTSYS;
271 
272 	if (pref >= 0)
273 		slist->preferred = pref;
274 	return 0;
275 }
276