Codebase list tcmu / e806efff-9c46-4ca6-acae-716029060d20/main
* QA upload. * Add fix-use-after-free.patch. Closes: #1013050 * d/p/series: Remove mention of non-existing patches. Håvard F. Aasen authored 1 year, 9 months ago Debian Janitor committed 1 year, 9 months ago
11 changed file(s) with 4190 addition(s) and 4 deletion(s). Raw diff Collapse all Expand all
0 debian/patches
0 /*
1 * Copyright (c) 2017 Red Hat, Inc.
2 *
3 * This file is licensed to you under your choice of the GNU Lesser
4 * General Public License, version 2.1 or any later version (LGPLv2.1 or
5 * later), or the Apache License 2.0.
6 */
7
8 #define _GNU_SOURCE
9 #include <scsi/scsi.h>
10 #include <errno.h>
11 #include <inttypes.h>
12 #include <sys/types.h>
13 #include <sys/stat.h>
14 #include <fcntl.h>
15
16 #include "ccan/list/list.h"
17
18 #include "darray.h"
19 #include "libtcmu.h"
20 #include "libtcmu_log.h"
21 #include "libtcmu_priv.h"
22 #include "libtcmu_common.h"
23 #include "tcmur_aio.h"
24 #include "tcmur_device.h"
25 #include "tcmu-runner.h"
26 #include "tcmu_runner_priv.h"
27 #include "tcmur_cmd_handler.h"
28 #include "alua.h"
29
30 static void _cleanup_spin_lock(void *arg)
31 {
32 pthread_spin_unlock(arg);
33 }
34
35 void tcmur_tcmulib_cmd_complete(struct tcmu_device *dev,
36 struct tcmulib_cmd *cmd, int rc)
37 {
38 struct tcmur_device *rdev = tcmu_dev_get_private(dev);
39 struct tcmur_cmd *tcmur_cmd = cmd->hm_private;
40 struct timespec curr_time;
41
42 pthread_cleanup_push(_cleanup_spin_lock, (void *)&rdev->lock);
43 pthread_spin_lock(&rdev->lock);
44
45 if (tcmur_cmd->timed_out) {
46 if (tcmur_get_time(dev, &curr_time)) {
47 tcmu_dev_info(dev, "Timed out command id %hu completed with status %d.\n",
48 cmd->cmd_id, rc);
49 } else {
50 tcmu_dev_info(dev, "Timed out command id %hu completed after %f seconds with status %d.\n",
51 cmd->cmd_id,
52 difftime(curr_time.tv_sec,
53 tcmur_cmd->start_time.tv_sec),
54 rc);
55 }
56 }
57
58 list_del(&tcmur_cmd->cmds_list_entry);
59
60 tcmulib_command_complete(dev, cmd, rc);
61
62 pthread_spin_unlock(&rdev->lock);
63 pthread_cleanup_pop(0);
64 }
65
66 static void aio_command_finish(struct tcmu_device *dev, struct tcmulib_cmd *cmd,
67 int rc)
68 {
69 struct tcmur_device *rdev = tcmu_dev_get_private(dev);
70 int wake_up;
71
72 tcmur_tcmulib_cmd_complete(dev, cmd, rc);
73 track_aio_request_finish(rdev, &wake_up);
74 while (wake_up) {
75 tcmulib_processing_complete(dev);
76 track_aio_wakeup_finish(rdev, &wake_up);
77 }
78 }
79
80 void tcmur_cmd_complete(struct tcmu_device *dev, void *data, int rc)
81 {
82 struct tcmur_cmd *tcmur_cmd = data;
83
84 tcmur_cmd->done(dev, tcmur_cmd, rc);
85 }
86
87 static void tcmur_cmd_iovec_reset(struct tcmur_cmd *tcmur_cmd,
88 size_t data_length)
89 {
90 tcmur_cmd->iovec->iov_base = tcmur_cmd->iov_base_copy;
91 tcmur_cmd->iovec->iov_len = data_length;
92 }
93
94 static void tcmur_cmd_state_free(struct tcmur_cmd *tcmur_cmd)
95 {
96 free(tcmur_cmd->cmd_state);
97 }
98
99 static int tcmur_cmd_state_init(struct tcmur_cmd *tcmur_cmd, int state_length,
100 size_t data_length)
101 {
102 void *state;
103 int iov_length = 0;
104
105 if (data_length)
106 iov_length = data_length + sizeof(struct iovec);
107
108 state = calloc(1, state_length + iov_length);
109 if (!state)
110 return -ENOMEM;
111
112 tcmur_cmd->cmd_state = state;
113 tcmur_cmd->requested = data_length;
114
115 if (data_length) {
116 struct iovec *iov = state + state_length;
117
118 iov->iov_base = iov + 1;
119 iov->iov_len = data_length;
120
121 tcmur_cmd->iov_base_copy = iov->iov_base;
122 tcmur_cmd->iov_cnt = 1;
123 tcmur_cmd->iovec = iov;
124 }
125
126 return 0;
127 }
128
129 static inline int check_iovec_length(struct tcmu_device *dev,
130 struct tcmulib_cmd *cmd, uint32_t sectors)
131 {
132 size_t iov_length = tcmu_iovec_length(cmd->iovec, cmd->iov_cnt);
133
134 if (iov_length != tcmu_lba_to_byte(dev, sectors)) {
135 tcmu_dev_err(dev, "iov len mismatch: iov len %zu, xfer len %u, block size %u\n",
136 iov_length, sectors, tcmu_dev_get_block_size(dev));
137 return TCMU_STS_HW_ERR;
138 }
139 return TCMU_STS_OK;
140 }
141
142 static inline int check_lbas(struct tcmu_device *dev,
143 uint64_t start_lba, uint64_t lba_cnt)
144 {
145 uint64_t dev_last_lba = tcmu_dev_get_num_lbas(dev);
146
147 if (start_lba + lba_cnt > dev_last_lba || start_lba + lba_cnt < start_lba) {
148 tcmu_dev_err(dev, "cmd exceeds last lba %"PRIu64" (lba %"PRIu64", xfer len %"PRIu64")\n",
149 dev_last_lba, start_lba, lba_cnt);
150 return TCMU_STS_RANGE;
151 }
152
153 return TCMU_STS_OK;
154 }
155
156 static int check_lba_and_length(struct tcmu_device *dev,
157 struct tcmulib_cmd *cmd, uint32_t sectors)
158 {
159 uint8_t *cdb = cmd->cdb;
160 uint64_t start_lba = tcmu_cdb_get_lba(cdb);
161 int ret;
162
163 ret = check_iovec_length(dev, cmd, sectors);
164 if (ret)
165 return ret;
166
167 ret = check_lbas(dev, start_lba, sectors);
168 if (ret)
169 return ret;
170
171 return TCMU_STS_OK;
172 }
173
174 static void handle_generic_cbk(struct tcmu_device *dev,
175 struct tcmur_cmd *tcmur_cmd, int ret)
176 {
177 aio_command_finish(dev, tcmur_cmd->lib_cmd, ret);
178 }
179
180 static int read_work_fn(struct tcmu_device *dev, void *data)
181 {
182 struct tcmur_handler *rhandler = tcmu_get_runner_handler(dev);
183 struct tcmur_cmd *tcmur_cmd = data;
184 struct tcmulib_cmd *cmd = tcmur_cmd->lib_cmd;
185
186 return rhandler->read(dev, tcmur_cmd, cmd->iovec, cmd->iov_cnt,
187 tcmu_iovec_length(cmd->iovec, cmd->iov_cnt),
188 tcmu_cdb_to_byte(dev, cmd->cdb));
189 }
190
191 static int write_work_fn(struct tcmu_device *dev, void *data)
192 {
193 struct tcmur_handler *rhandler = tcmu_get_runner_handler(dev);
194 struct tcmur_cmd *tcmur_cmd = data;
195 struct tcmulib_cmd *cmd = tcmur_cmd->lib_cmd;
196
197 return rhandler->write(dev, tcmur_cmd, cmd->iovec, cmd->iov_cnt,
198 tcmu_iovec_length(cmd->iovec, cmd->iov_cnt),
199 tcmu_cdb_to_byte(dev, cmd->cdb));
200 }
201
202 struct unmap_state {
203 pthread_mutex_t lock;
204 unsigned int refcount;
205 int status;
206 };
207
208 struct unmap_descriptor {
209 uint64_t offset;
210 uint64_t length;
211 };
212
213 static int unmap_init(struct tcmu_device *dev, struct tcmulib_cmd *cmd)
214 {
215 struct tcmur_cmd *tcmur_cmd = cmd->hm_private;
216 struct unmap_state *state;
217 int ret;
218
219 if (tcmur_cmd_state_init(tcmur_cmd, sizeof(*state), 0))
220 return TCMU_STS_NO_RESOURCE;
221 state = tcmur_cmd->cmd_state;
222
223 ret = pthread_mutex_init(&state->lock, NULL);
224 if (ret == -1) {
225 tcmu_dev_err(dev, "Failed to init spin lock in state!\n");
226 ret = TCMU_STS_HW_ERR;
227 goto out_free_state;
228 }
229
230 /* released by allocator when done submitting unmaps */
231 state->refcount = 1;
232 state->status = TCMU_STS_OK;
233 return TCMU_STS_OK;
234
235 out_free_state:
236 tcmur_cmd_state_free(tcmur_cmd);
237 return ret;
238 }
239
240 static void unmap_put(struct tcmu_device *dev, struct tcmulib_cmd *cmd,
241 int ret)
242 {
243 struct tcmur_cmd *tcmur_cmd = cmd->hm_private;
244 struct unmap_state *state = tcmur_cmd->cmd_state;
245 int status;
246
247 pthread_mutex_lock(&state->lock);
248
249 if (state->status != TCMU_STS_OK && ret)
250 state->status = ret;
251
252 if (--state->refcount > 0) {
253 pthread_mutex_unlock(&state->lock);
254 return;
255 }
256 status = state->status;
257 pthread_mutex_unlock(&state->lock);
258
259 pthread_mutex_destroy(&state->lock);
260 tcmur_cmd_state_free(tcmur_cmd);
261
262 aio_command_finish(dev, cmd, status);
263 }
264
265 static void handle_unmap_cbk(struct tcmu_device *dev,
266 struct tcmur_cmd *tcmur_ucmd, int ret)
267 {
268 struct unmap_descriptor *desc = tcmur_ucmd->cmd_state;
269 struct tcmulib_cmd *cmd = tcmur_ucmd->lib_cmd;
270
271 free(desc);
272 free(tcmur_ucmd);
273
274 unmap_put(dev, cmd, ret);
275 }
276
277 static int unmap_work_fn(struct tcmu_device *dev, void *data)
278 {
279 struct tcmur_handler *rhandler = tcmu_get_runner_handler(dev);
280 struct tcmur_cmd *tcmur_ucmd = data;
281 struct unmap_descriptor *desc = tcmur_ucmd->cmd_state;
282 uint64_t offset = desc->offset, length = desc->length;
283
284 return rhandler->unmap(dev, tcmur_ucmd, offset, length);
285 }
286
287 static int align_and_split_unmap(struct tcmu_device *dev,
288 struct tcmur_cmd *tcmur_cmd,
289 uint64_t lba, uint64_t nlbas)
290 {
291 struct unmap_state *state = tcmur_cmd->cmd_state;
292 uint64_t opt_unmap_gran;
293 uint64_t unmap_gran_align, mask;
294 int ret = TCMU_STS_NOT_HANDLED;
295 int j = 0;
296 struct unmap_descriptor *desc;
297 struct tcmur_cmd *tcmur_ucmd;
298 uint64_t lbas;
299
300 if (!dev->split_unmaps) {
301 /*
302 * Handler does not support vectored unmaps, but prefers to
303 * break up unmaps itself, so pass the entire segment to it.
304 */
305 opt_unmap_gran = tcmu_dev_get_max_unmap_len(dev);
306 mask = 0;
307 } else {
308 /*
309 * Align the start lba of a unmap request and split the
310 * large num blocks into OPTIMAL UNMAP GRANULARITY size.
311 *
312 * NOTE: here we always asumme the OPTIMAL UNMAP GRANULARITY
313 * equals to UNMAP GRANULARITY ALIGNMENT to simplify the
314 * algorithm. In the future, for new devices that have different
315 * values the following align and split algorithm should be
316 * changed.
317 */
318
319 /* OPTIMAL UNMAP GRANULARITY */
320 opt_unmap_gran = tcmu_dev_get_opt_unmap_gran(dev);
321
322 /* UNMAP GRANULARITY ALIGNMENT */
323 unmap_gran_align = tcmu_dev_get_unmap_gran_align(dev);
324 mask = unmap_gran_align - 1;
325 }
326
327 lbas = opt_unmap_gran - (lba & mask);
328 lbas = min(lbas, nlbas);
329
330 tcmu_dev_dbg(dev, "OPTIMAL UNMAP GRANULARITY: %"PRIu64", UNMAP GRANULARITY ALIGNMENT mask: %"PRIu64", lbas: %"PRIu64"\n",
331 opt_unmap_gran, mask, lbas);
332
333 while (nlbas) {
334 desc = calloc(1, sizeof(*desc));
335 if (!desc) {
336 tcmu_dev_err(dev, "Failed to calloc desc!\n");
337 return TCMU_STS_NO_RESOURCE;
338 }
339 desc->offset = tcmu_lba_to_byte(dev, lba);
340 desc->length = tcmu_lba_to_byte(dev, lbas);
341
342 tcmur_ucmd = calloc(1, sizeof(*tcmur_ucmd));
343 if (!tcmur_ucmd) {
344 tcmu_dev_err(dev, "Failed to calloc unmap cmd!\n");
345 ret = TCMU_STS_NO_RESOURCE;
346 goto free_desc;
347 }
348 tcmur_ucmd->cmd_state = desc;
349 tcmur_ucmd->lib_cmd = tcmur_cmd->lib_cmd;
350 tcmur_ucmd->done = handle_unmap_cbk;
351
352 /* The first one */
353 if (j++ == 0)
354 tcmu_dev_dbg(dev, "The first split: start lba: %"PRIu64", end lba: %"PRIu64", lbas: %"PRIu64"\n",
355 lba, lba + lbas - 1, lbas);
356
357 /* The last one */
358 if (nlbas == lbas) {
359 tcmu_dev_dbg(dev, "The last split: start lba: %"PRIu64", end lba: %"PRIu64", lbas: %"PRIu64"\n",
360 lba, lba + lbas - 1, lbas);
361 tcmu_dev_dbg(dev, "There are totally %d splits\n", j);
362 }
363
364 pthread_mutex_lock(&state->lock);
365 state->refcount++;
366 pthread_mutex_unlock(&state->lock);
367
368 ret = aio_request_schedule(dev, tcmur_ucmd, unmap_work_fn,
369 tcmur_cmd_complete);
370 if (ret != TCMU_STS_ASYNC_HANDLED)
371 goto free_ucmd;
372
373 nlbas -= lbas;
374 lba += lbas;
375 lbas = min(opt_unmap_gran, nlbas);
376 }
377
378 return ret;
379
380 free_ucmd:
381 pthread_mutex_lock(&state->lock);
382 state->refcount--;
383 pthread_mutex_unlock(&state->lock);
384 free(tcmur_ucmd);
385 free_desc:
386 free(desc);
387 return ret;
388 }
389
390 static int handle_unmap_internal(struct tcmu_device *dev, struct tcmulib_cmd *cmd,
391 uint16_t bddl, uint8_t *par)
392 {
393 struct tcmur_cmd *tcmur_cmd = cmd->hm_private;
394 struct unmap_state *state;
395 uint16_t offset = 0;
396 int ret = TCMU_STS_OK, i = 0;
397
398 ret = unmap_init(dev, cmd);
399 if (ret)
400 return ret;
401 state = tcmur_cmd->cmd_state;
402
403 /* The first descriptor list offset is 8 in Data-Out buffer */
404 par += 8;
405 while (bddl) {
406 uint64_t lba;
407 uint64_t nlbas;
408
409 lba = be64toh(*((uint64_t *)&par[offset]));
410 nlbas = be32toh(*((uint32_t *)&par[offset + 8]));
411
412 tcmu_dev_dbg(dev, "Parameter list %d, start lba: %"PRIu64", end lba: %"PRIu64", nlbas: %"PRIu64"\n",
413 i++, lba, lba + nlbas - 1, nlbas);
414
415 if (nlbas > tcmu_dev_get_max_unmap_len(dev)) {
416 tcmu_dev_err(dev, "Illegal parameter list LBA count %"PRIu64" exceeds:%u\n",
417 nlbas, tcmu_dev_get_max_unmap_len(dev));
418 ret = TCMU_STS_INVALID_PARAM_LIST;
419 goto done;
420 }
421
422 ret = check_lbas(dev, lba, nlbas);
423 if (ret)
424 goto done;
425
426 if (nlbas) {
427 ret = align_and_split_unmap(dev, tcmur_cmd, lba, nlbas);
428 if (ret != TCMU_STS_ASYNC_HANDLED)
429 goto done;
430 }
431
432 /* The unmap block descriptor data length is 16 */
433 offset += 16;
434 bddl -= 16;
435 }
436
437 done:
438 /*
439 * unmap_put will do the right thing, so always return
440 * TCMU_STS_ASYNC_HANDLED
441 */
442 pthread_mutex_lock(&state->lock);
443 if (ret == TCMU_STS_ASYNC_HANDLED) {
444 ret = TCMU_STS_OK;
445 } else {
446 state->status = ret;
447 }
448 pthread_mutex_unlock(&state->lock);
449
450 unmap_put(dev, cmd, ret);
451 return TCMU_STS_ASYNC_HANDLED;
452 }
453
454 static int handle_unmap(struct tcmu_device *dev, struct tcmulib_cmd *origcmd)
455 {
456 struct tcmur_handler *rhandler = tcmu_get_runner_handler(dev);
457 uint8_t *cdb = origcmd->cdb;
458 size_t copied, data_length = tcmu_cdb_get_xfer_length(cdb);
459 uint8_t *par;
460 uint16_t dl, bddl;
461 int ret;
462
463 if (!rhandler->unmap)
464 return TCMU_STS_INVALID_CMD;
465
466 /*
467 * ANCHOR bit check
468 *
469 * The ANCHOR in the Logical Block Provisioning VPD page is not
470 * supported, so the ANCHOR bit shouldn't be set here.
471 */
472 if (cdb[1] & 0x01) {
473 tcmu_dev_err(dev, "Illegal request: anchor is not supported for now!\n");
474 return TCMU_STS_INVALID_CDB;
475 }
476
477 /*
478 * PARAMETER LIST LENGTH field.
479 *
480 * The PARAMETER LIST LENGTH field specifies the length in bytes of
481 * the UNMAP parameter data that shall be sent from the application
482 * client to the device server.
483 *
484 * A PARAMETER LIST LENGTH set to zero specifies that no data shall
485 * be sent.
486 */
487 if (!data_length) {
488 tcmu_dev_dbg(dev, "Data-Out Buffer length is zero, just return okay\n");
489 return TCMU_STS_OK;
490 }
491
492 /*
493 * From sbc4r13, section 5.32.1 UNMAP command overview.
494 *
495 * The PARAMETER LIST LENGTH should be greater than eight,
496 */
497 if (data_length < 8) {
498 tcmu_dev_err(dev, "Illegal parameter list length %zu and it should be >= 8\n",
499 data_length);
500 return TCMU_STS_INVALID_PARAM_LIST_LEN;
501 }
502
503 par = calloc(1, data_length);
504 if (!par) {
505 tcmu_dev_err(dev, "The state parameter is NULL!\n");
506 return TCMU_STS_NO_RESOURCE;
507 }
508 copied = tcmu_memcpy_from_iovec(par, data_length, origcmd->iovec,
509 origcmd->iov_cnt);
510 if (copied != data_length) {
511 tcmu_dev_err(dev, "Failed to copy the Data-Out Buffer !\n");
512 ret = TCMU_STS_INVALID_PARAM_LIST_LEN;
513 goto out_free_par;
514 }
515
516 /*
517 * If any UNMAP block descriptors in the UNMAP block descriptor
518 * list are truncated due to the parameter list length in the CDB,
519 * then that UNMAP block descriptor shall be ignored.
520 *
521 * So it will allow dl + 2 != data_length and bddl + 8 != data_length.
522 */
523 dl = be16toh(*((uint16_t *)&par[0]));
524 bddl = be16toh(*((uint16_t *)&par[2]));
525
526 tcmu_dev_dbg(dev, "Data-Out Buffer Length: %zu, dl: %hu, bddl: %hu\n",
527 data_length, dl, bddl);
528
529 /*
530 * If the unmap block descriptor data length is not a multiple
531 * of 16, then the last unmap block descriptor is incomplete
532 * and shall be ignored.
533 */
534 bddl &= ~0xF;
535
536 /*
537 * If the UNMAP BLOCK DESCRIPTOR DATA LENGTH is set to zero, then
538 * no unmap block descriptors are included in the UNMAP parameter
539 * list.
540 */
541 if (!bddl) {
542 ret = TCMU_STS_OK;
543 goto out_free_par;
544 }
545
546 if (bddl / 16 > VPD_MAX_UNMAP_BLOCK_DESC_COUNT) {
547 tcmu_dev_err(dev, "Illegal parameter list count %hu exceeds :%u\n",
548 bddl / 16, VPD_MAX_UNMAP_BLOCK_DESC_COUNT);
549 ret = TCMU_STS_INVALID_PARAM_LIST;
550 goto out_free_par;
551 }
552
553 ret = handle_unmap_internal(dev, origcmd, bddl, par);
554
555 out_free_par:
556 free(par);
557 return ret;
558 }
559
560 struct write_same {
561 uint64_t cur_lba;
562 uint64_t lba_cnt;
563 };
564
565 static int writesame_work_fn(struct tcmu_device *dev, void *data)
566 {
567 struct tcmur_handler *rhandler = tcmu_get_runner_handler(dev);
568 struct tcmur_cmd *tcmur_cmd = data;
569 struct write_same *write_same = tcmur_cmd->cmd_state;
570
571 tcmur_cmd_iovec_reset(tcmur_cmd, tcmur_cmd->requested);
572 /*
573 * Write contents of the logical block data(from the Data-Out Buffer)
574 * to each LBA in the specified LBA range.
575 */
576 return rhandler->write(dev, tcmur_cmd, tcmur_cmd->iovec,
577 tcmur_cmd->iov_cnt, tcmur_cmd->requested,
578 tcmu_lba_to_byte(dev, write_same->cur_lba));
579 }
580
581 static void handle_writesame_cbk(struct tcmu_device *dev,
582 struct tcmur_cmd *tcmur_cmd, int ret)
583 {
584 struct tcmulib_cmd *cmd = tcmur_cmd->lib_cmd;
585 struct write_same *write_same = tcmur_cmd->cmd_state;
586 uint64_t write_lbas = tcmu_byte_to_lba(dev, tcmur_cmd->requested);
587 uint64_t left_lbas;
588 int rc;
589
590 /* write failed - bail out */
591 if (ret != TCMU_STS_OK)
592 goto finish_err;
593
594 write_same->cur_lba += write_lbas;
595 write_same->lba_cnt -= write_lbas;
596 left_lbas = write_same->lba_cnt;
597
598 if (!left_lbas)
599 goto finish_err;
600
601 if (left_lbas <= write_lbas) {
602 tcmu_dev_dbg(dev, "Last lba: %"PRIu64", write lbas: %"PRIu64"\n",
603 write_same->cur_lba, left_lbas);
604
605 tcmur_cmd->requested = tcmu_lba_to_byte(dev, left_lbas);
606 } else {
607 tcmu_dev_dbg(dev, "Next lba: %"PRIu64", write lbas: %"PRIu64"\n",
608 write_same->cur_lba, write_lbas);
609 }
610
611 rc = aio_request_schedule(dev, tcmur_cmd, writesame_work_fn,
612 tcmur_cmd_complete);
613 if (rc != TCMU_STS_ASYNC_HANDLED) {
614 tcmu_dev_err(dev, "Write same async handle cmd failure\n");
615 ret = TCMU_STS_WR_ERR;
616 goto finish_err;
617 }
618
619 return;
620
621 finish_err:
622 tcmur_cmd_state_free(tcmur_cmd);
623 aio_command_finish(dev, cmd, ret);
624 }
625
626 static int handle_writesame_check(struct tcmu_device *dev, struct tcmulib_cmd *cmd)
627 {
628 uint8_t *cdb = cmd->cdb;
629 uint32_t lba_cnt = tcmu_cdb_get_xfer_length(cdb);
630 uint32_t block_size = tcmu_dev_get_block_size(dev);
631 uint64_t start_lba = tcmu_cdb_get_lba(cdb);
632 int ret;
633
634 if (cmd->iov_cnt != 1 || cmd->iovec->iov_len != block_size) {
635 tcmu_dev_err(dev, "Illegal Data-Out: iov_cnt %zu length: %zu\n",
636 cmd->iov_cnt, cmd->iovec->iov_len);
637 return TCMU_STS_INVALID_CDB;
638 }
639
640 /*
641 * From sbc4r13, section 5.50 WRITE SAME (16) command
642 *
643 * A write same (WSNZ) bit has beed set to one, so the device server
644 * won't support a value of zero here.
645 */
646 if (!lba_cnt) {
647 tcmu_dev_err(dev, "The WSNZ = 1 & WRITE_SAME blocks = 0 is not supported!\n");
648 return TCMU_STS_INVALID_CDB;
649 }
650
651 /*
652 * The MAXIMUM WRITE SAME LENGTH field in Block Limits VPD page (B0h)
653 * limit the maximum block number for the WRITE SAME.
654 */
655 if (lba_cnt > VPD_MAX_WRITE_SAME_LENGTH) {
656 tcmu_dev_err(dev, "blocks: %u exceeds MAXIMUM WRITE SAME LENGTH: %u\n",
657 lba_cnt, VPD_MAX_WRITE_SAME_LENGTH);
658 return TCMU_STS_INVALID_CDB;
659 }
660
661 /*
662 * The logical block address plus the number of blocks shouldn't
663 * exceeds the capacity of the medium
664 */
665 ret = check_lbas(dev, start_lba, lba_cnt);
666 if (ret)
667 return ret;
668
669 tcmu_dev_dbg(dev, "Start lba: %"PRIu64", number of lba: %u, last lba: %"PRIu64"\n",
670 start_lba, lba_cnt, start_lba + lba_cnt - 1);
671
672 return TCMU_STS_OK;
673 }
674
675 static int handle_unmap_in_writesame(struct tcmu_device *dev,
676 struct tcmulib_cmd *cmd)
677 {
678 struct tcmur_cmd *tcmur_cmd = cmd->hm_private;
679 uint8_t *cdb = cmd->cdb;
680 uint64_t lba = tcmu_cdb_get_lba(cdb);
681 uint64_t nlbas = tcmu_cdb_get_xfer_length(cdb);
682 uint32_t align = tcmu_dev_get_unmap_gran_align(dev);
683 struct unmap_state *state;
684 int ret;
685
686 /* If not aligned then falls back to the writesame without unmap */
687 if (lba % align || nlbas % align) {
688 tcmu_dev_dbg(dev,
689 "Start lba: %"PRIu64" or nlbas: %"PRIu64" not aligned to %"PRIu32"\n",
690 lba, nlbas, align);
691 tcmu_dev_dbg(dev, "Falls back to writesame without unmap!\n");
692 return TCMU_STS_NOT_HANDLED;
693 }
694
695 tcmu_dev_dbg(dev, "Do UNMAP in WRITE_SAME cmd!\n");
696
697 ret = unmap_init(dev, cmd);
698 if (ret)
699 return ret;
700 state = tcmur_cmd->cmd_state;
701
702 ret = align_and_split_unmap(dev, tcmur_cmd, lba, nlbas);
703 if (ret == TCMU_STS_ASYNC_HANDLED) {
704 ret = TCMU_STS_OK;
705 } else {
706 state->status = ret;
707 }
708
709 unmap_put(dev, cmd, ret);
710 return TCMU_STS_ASYNC_HANDLED;
711 }
712
713 static int tcmur_writesame_work_fn(struct tcmu_device *dev, void *data)
714 {
715 struct tcmur_cmd *tcmur_cmd = data;
716 struct tcmulib_cmd *cmd = tcmur_cmd->lib_cmd;
717 tcmur_writesame_fn_t write_same_fn = tcmur_cmd->cmd_state;
718 uint8_t *cdb = cmd->cdb;
719 uint64_t off = tcmu_cdb_to_byte(dev, cdb);
720 uint64_t len = tcmu_lba_to_byte(dev, tcmu_cdb_get_xfer_length(cdb));
721
722 /*
723 * Write contents of the logical block data(from the Data-Out Buffer)
724 * to each LBA in the specified LBA range.
725 */
726 return write_same_fn(dev, tcmur_cmd, off, len, cmd->iovec,
727 cmd->iov_cnt);
728 }
729
730 static int handle_writesame(struct tcmu_device *dev, struct tcmulib_cmd *cmd)
731 {
732 struct tcmur_handler *rhandler = tcmu_get_runner_handler(dev);
733 struct tcmur_cmd *tcmur_cmd = cmd->hm_private;
734 uint8_t *cdb = cmd->cdb;
735 uint32_t lba_cnt = tcmu_cdb_get_xfer_length(cdb);
736 uint32_t block_size = tcmu_dev_get_block_size(dev);
737 uint64_t start_lba = tcmu_cdb_get_lba(cdb);
738 uint64_t write_lbas;
739 size_t max_xfer_length, length = 1024 * 1024;
740 struct write_same *write_same;
741 int i, ret;
742
743 if (tcmu_dev_in_recovery(dev))
744 return TCMU_STS_BUSY;
745
746 ret = alua_check_state(dev, cmd, false);
747 if (ret)
748 return ret;
749
750 ret = handle_writesame_check(dev, cmd);
751 if (ret)
752 return ret;
753
754 if (rhandler->unmap && (cmd->cdb[1] & 0x08)) {
755 ret = handle_unmap_in_writesame(dev, cmd);
756 if (ret != TCMU_STS_NOT_HANDLED)
757 return ret;
758 }
759
760 if (rhandler->writesame) {
761 tcmur_cmd->cmd_state = rhandler->writesame;
762 tcmur_cmd->done = handle_generic_cbk;
763 return aio_request_schedule(dev, tcmur_cmd,
764 tcmur_writesame_work_fn,
765 tcmur_cmd_complete);
766 }
767
768 max_xfer_length = tcmu_dev_get_max_xfer_len(dev) * block_size;
769 length = round_up(length, max_xfer_length);
770 length = min(length, tcmu_lba_to_byte(dev, lba_cnt));
771
772 if (tcmur_cmd_state_init(tcmur_cmd, sizeof(*write_same), length)) {
773 tcmu_dev_err(dev, "Failed to calloc write_same data!\n");
774 return TCMU_STS_NO_RESOURCE;
775 }
776 tcmur_cmd->done = handle_writesame_cbk;
777
778 write_lbas = tcmu_byte_to_lba(dev, length);
779 for (i = 0; i < write_lbas; i++)
780 memcpy(tcmur_cmd->iovec->iov_base + i * block_size,
781 cmd->iovec->iov_base, block_size);
782
783 write_same = tcmur_cmd->cmd_state;
784 write_same->cur_lba = start_lba;
785 write_same->lba_cnt = lba_cnt;
786
787 tcmu_dev_dbg(dev, "First lba: %"PRIu64", write lbas: %"PRIu64"\n",
788 start_lba, write_lbas);
789
790 return aio_request_schedule(dev, tcmur_cmd, writesame_work_fn,
791 tcmur_cmd_complete);
792 }
793
794 /* async write verify */
795 struct write_verify_state {
796 struct iovec *w_iovec;
797 size_t w_iov_cnt;
798 };
799
800 static void handle_write_verify_read_cbk(struct tcmu_device *dev,
801 struct tcmur_cmd *tcmur_cmd, int ret)
802 {
803 struct write_verify_state *state = tcmur_cmd->cmd_state;
804 struct tcmulib_cmd *cmd = tcmur_cmd->lib_cmd;
805 uint32_t cmp_offset;
806
807 /* failed read - bail out */
808 if (ret != TCMU_STS_OK)
809 goto done;
810
811 ret = TCMU_STS_OK;
812 cmp_offset = tcmu_iovec_compare(tcmur_cmd->iov_base_copy,
813 state->w_iovec, tcmur_cmd->requested);
814 if (cmp_offset != -1) {
815 tcmu_dev_err(dev, "Verify failed at offset %u\n", cmp_offset);
816 ret = TCMU_STS_MISCOMPARE;
817 tcmu_sense_set_info(cmd->sense_buf, cmp_offset);
818 }
819
820 done:
821 tcmur_cmd_state_free(tcmur_cmd);
822 aio_command_finish(dev, cmd, ret);
823 }
824
825 static int write_verify_read_work_fn(struct tcmu_device *dev, void *data)
826 {
827 struct tcmur_handler *rhandler = tcmu_get_runner_handler(dev);
828 struct tcmur_cmd *tcmur_cmd = data;
829 struct tcmulib_cmd *cmd = tcmur_cmd->lib_cmd;
830
831 return rhandler->read(dev, tcmur_cmd, tcmur_cmd->iovec,
832 tcmur_cmd->iov_cnt, tcmur_cmd->requested,
833 tcmu_cdb_to_byte(dev, cmd->cdb));
834 }
835
836 static void handle_write_verify_write_cbk(struct tcmu_device *dev,
837 struct tcmur_cmd *tcmur_cmd,
838 int ret)
839 {
840 struct tcmulib_cmd *cmd = tcmur_cmd->lib_cmd;
841
842 /* write error - bail out */
843 if (ret != TCMU_STS_OK)
844 goto finish_err;
845
846 tcmur_cmd->done = handle_write_verify_read_cbk;
847
848 ret = aio_request_schedule(dev, tcmur_cmd, write_verify_read_work_fn,
849 tcmur_cmd_complete);
850 if (ret != TCMU_STS_ASYNC_HANDLED)
851 goto finish_err;
852 return;
853
854 finish_err:
855 tcmur_cmd_state_free(tcmur_cmd);
856 aio_command_finish(dev, cmd, ret);
857 }
858
859 static int handle_write_verify(struct tcmu_device *dev, struct tcmulib_cmd *cmd)
860 {
861 struct tcmur_cmd *tcmur_cmd = cmd->hm_private;
862 uint8_t *cdb = cmd->cdb;
863 size_t length = tcmu_lba_to_byte(dev, tcmu_cdb_get_xfer_length(cdb));
864 struct write_verify_state *state;
865 int i, ret, state_len;
866
867 ret = check_lba_and_length(dev, cmd, tcmu_cdb_get_xfer_length(cdb));
868 if (ret)
869 return ret;
870
871 state_len = sizeof(*state) + (cmd->iov_cnt * sizeof(struct iovec));
872
873 if (tcmur_cmd_state_init(tcmur_cmd, state_len, length))
874 return TCMU_STS_NO_RESOURCE;
875 tcmur_cmd->done = handle_write_verify_write_cbk;
876
877 state = tcmur_cmd->cmd_state;
878 /*
879 * Copy cmd iovec for later comparision in case handler modifies
880 * pointers/lens.
881 */
882 state->w_iovec = (void *)state + sizeof(*state);
883 state->w_iov_cnt = cmd->iov_cnt;
884 for (i = 0; i < cmd->iov_cnt; i++) {
885 state->w_iovec[i].iov_base = cmd->iovec[i].iov_base;
886 state->w_iovec[i].iov_len = cmd->iovec[i].iov_len;
887 }
888
889 ret = aio_request_schedule(dev, tcmur_cmd, write_work_fn,
890 tcmur_cmd_complete);
891 if (ret != TCMU_STS_ASYNC_HANDLED)
892 goto free_state;
893
894 return TCMU_STS_ASYNC_HANDLED;
895
896 free_state:
897 tcmur_cmd_state_free(tcmur_cmd);
898 return ret;
899 }
900
901 #define XCOPY_HDR_LEN 16
902 #define XCOPY_TARGET_DESC_LEN 32
903 #define XCOPY_SEGMENT_DESC_B2B_LEN 28
904 #define XCOPY_NAA_IEEE_REGEX_LEN 16
905
906 struct xcopy {
907 struct tcmu_device *origdev;
908 struct tcmu_device *src_dev;
909 uint8_t src_tid_wwn[XCOPY_NAA_IEEE_REGEX_LEN];
910 struct tcmu_device *dst_dev;
911 uint8_t dst_tid_wwn[XCOPY_NAA_IEEE_REGEX_LEN];
912
913 uint64_t src_lba;
914 uint64_t dst_lba;
915 uint32_t stdi;
916 uint32_t dtdi;
917 uint32_t lba_cnt;
918 uint32_t copy_lbas;
919 };
920
921 /* For now only supports block -> block type */
922 static int xcopy_parse_segment_descs(uint8_t *seg_descs, struct xcopy *xcopy,
923 uint8_t sdll)
924 {
925 uint8_t *seg_desc = seg_descs;
926 uint8_t desc_len;
927
928 /*
929 * From spc4r31, section 6.3.7.5 Block device to block device
930 * operations
931 *
932 * The segment descriptor size should be 28 bytes
933 */
934 if (sdll % XCOPY_SEGMENT_DESC_B2B_LEN != 0) {
935 tcmu_err("Illegal block --> block type segment descriptor length %u\n",
936 sdll);
937 return TCMU_STS_INVALID_PARAM_LIST;
938 }
939
940 /* From spc4r36q, section 6.4.3.5 SEGMENT DESCRIPTOR LIST LENGTH field
941 * If the number of segment descriptors exceeds the allowed number, the copy
942 * manager shall terminate the command with CHECK CONDITION status, with the
943 * sense key set to ILLEGAL REQUEST, and the additional sense code set to
944 * TOO MANY SEGMENT DESCRIPTORS.
945 */
946 if (sdll > RCR_OP_MAX_SEGMENT_DESC_COUNT * XCOPY_SEGMENT_DESC_B2B_LEN) {
947 tcmu_err("Only %u segment descriptor(s) supported, but there are %u\n",
948 RCR_OP_MAX_SEGMENT_DESC_COUNT,
949 sdll / XCOPY_SEGMENT_DESC_B2B_LEN);
950 return TCMU_STS_TOO_MANY_SEG_DESC;
951 }
952
953 /* EXTENDED COPY segment descriptor type codes block --> block */
954 if (seg_desc[0] != XCOPY_SEG_DESC_TYPE_CODE_B2B) {
955 tcmu_err("Unsupport segment descriptor type code 0x%x\n",
956 seg_desc[0]);
957 return TCMU_STS_NOTSUPP_SEG_DESC_TYPE;
958 }
959
960 /*
961 * For block -> block type the length is 4-byte header + 0x18-byte
962 * data.
963 */
964 desc_len = be16toh(*(uint16_t *)&seg_desc[2]);
965 if (desc_len != 0x18) {
966 tcmu_err("Invalid length for block->block type 0x%x\n",
967 desc_len);
968 return TCMU_STS_INVALID_PARAM_LIST;
969 }
970
971 /*
972 * From spc4r31, section 6.3.7.1 Segment descriptors introduction
973 *
974 * The SOURCE TARGET DESCRIPTOR INDEX field contains an index into
975 * the target descriptor list (see 6.3.1) identifying the source
976 * copy target device. The DESTINATION TARGET DESCRIPTOR INDEX field
977 * contains an index into the target descriptor list (see 6.3.1)
978 * identifying the destination copy target device.
979 */
980 xcopy->stdi = be16toh(*(uint16_t *)&seg_desc[4]);
981 xcopy->dtdi = be16toh(*(uint16_t *)&seg_desc[6]);
982 tcmu_dbg("Segment descriptor: stdi: %hu dtdi: %hu\n", xcopy->stdi,
983 xcopy->dtdi);
984
985 xcopy->lba_cnt = be16toh(*(uint16_t *)&seg_desc[10]);
986 xcopy->src_lba = be64toh(*(uint64_t *)&seg_desc[12]);
987 xcopy->dst_lba = be64toh(*(uint64_t *)&seg_desc[20]);
988 tcmu_dbg("Segment descriptor: lba_cnt: %u src_lba: %"PRIu64" dst_lba: %"PRIu64"\n",
989 xcopy->lba_cnt, xcopy->src_lba, xcopy->dst_lba);
990
991 return TCMU_STS_OK;
992 }
993
994 static int xcopy_gen_naa_ieee(struct tcmu_device *udev, uint8_t *wwn)
995 {
996 char *buf, *p;
997 bool next = true;
998 int ind = 0;
999
1000 /* Set type 6 and use OpenFabrics IEEE Company ID: 00 14 05 */
1001 wwn[ind++] = 0x60;
1002 wwn[ind++] = 0x01;
1003 wwn[ind++] = 0x40;
1004 wwn[ind] = 0x50;
1005
1006 /* Parse the udev vpd unit serial number */
1007 buf = tcmu_cfgfs_dev_get_wwn(udev);
1008 if (!buf)
1009 return -1;
1010 p = buf;
1011
1012 /*
1013 * Generate up to 36 bits of VENDOR SPECIFIC IDENTIFIER starting on
1014 * byte 3 bit 3-0 for NAA IEEE Registered Extended DESIGNATOR field
1015 * format, followed by 64 bits of VENDOR SPECIFIC IDENTIFIER EXTENSION
1016 * to complete the payload. These are based from VPD=0x80 PRODUCT SERIAL
1017 * NUMBER set via vpd_unit_serial in target_core_configfs.c to ensure
1018 * per device uniqeness.
1019 */
1020 for (; *p && ind < XCOPY_NAA_IEEE_REGEX_LEN; p++) {
1021 uint8_t val;
1022
1023 if (!char_to_hex(&val, *p))
1024 continue;
1025
1026 if (next) {
1027 next = false;
1028 wwn[ind++] |= val;
1029 } else {
1030 next = true;
1031 wwn[ind] = val << 4;
1032 }
1033 }
1034
1035 free(buf);
1036 return TCMU_STS_OK;
1037 }
1038
1039 static int xcopy_locate_udev(struct tcmulib_context *ctx,
1040 const uint8_t *dev_wwn,
1041 struct tcmu_device **udev)
1042 {
1043 struct tcmu_device **dev_ptr;
1044 struct tcmu_device *dev;
1045 uint8_t wwn[XCOPY_NAA_IEEE_REGEX_LEN];
1046
1047 darray_foreach(dev_ptr, ctx->devices) {
1048 dev = *dev_ptr;
1049
1050 memset(wwn, 0, XCOPY_NAA_IEEE_REGEX_LEN);
1051 if (xcopy_gen_naa_ieee(dev, wwn))
1052 return -1;
1053
1054 if (memcmp(wwn, dev_wwn, XCOPY_NAA_IEEE_REGEX_LEN))
1055 continue;
1056
1057 *udev = dev;
1058 tcmu_dev_dbg(dev, "Located tcmu devivce: %s\n",
1059 dev->tcm_dev_name);
1060
1061 return 0;
1062 }
1063
1064 return -1;
1065 }
1066
1067 /* Identification descriptor target */
1068 static int xcopy_parse_target_id(struct tcmu_device *udev,
1069 struct xcopy *xcopy,
1070 uint8_t *tgt_desc,
1071 int32_t index)
1072 {
1073 uint8_t wwn[XCOPY_NAA_IEEE_REGEX_LEN];
1074
1075 /*
1076 * Generate an IEEE Registered Extended designator based upon the
1077 * device the XCOPY specified.
1078 */
1079 memset(wwn, 0, XCOPY_NAA_IEEE_REGEX_LEN);
1080 if (xcopy_gen_naa_ieee(udev, wwn))
1081 return TCMU_STS_HW_ERR;
1082
1083 /*
1084 * CODE SET: for now only binary type code is supported.
1085 */
1086 if ((tgt_desc[4] & 0x0f) != 0x1) {
1087 tcmu_dev_err(udev, "Id target CODE DET only support binary type!\n");
1088 return TCMU_STS_INVALID_PARAM_LIST;
1089 }
1090
1091 /*
1092 * ASSOCIATION: for now only LUN type code is supported.
1093 */
1094 if ((tgt_desc[5] & 0x30) != 0x00) {
1095 tcmu_dev_err(udev, "Id target ASSOCIATION other than LUN not supported!\n");
1096 return TCMU_STS_INVALID_PARAM_LIST;
1097 }
1098
1099 /*
1100 * DESIGNATOR TYPE: for now only NAA type code is supported.
1101 *
1102 * The designator type define please see: such as
1103 * From spc4r31, section 7.8.6.1 Device Identification VPD page
1104 * overview
1105 */
1106 if ((tgt_desc[5] & 0x0f) != 0x3) {
1107 tcmu_dev_err(udev, "Id target DESIGNATOR TYPE other than NAA not supported!\n");
1108 return TCMU_STS_INVALID_PARAM_LIST;
1109 }
1110 /*
1111 * Check for matching 16 byte length for NAA IEEE Registered Extended
1112 * Assigned designator
1113 */
1114 if (tgt_desc[7] != 16) {
1115 tcmu_dev_err(udev, "Id target DESIGNATOR LENGTH should be 16, but it's: %d\n",
1116 tgt_desc[7]);
1117 return TCMU_STS_INVALID_PARAM_LIST;
1118 }
1119
1120 /*
1121 * Check for NAA IEEE Registered Extended Assigned header.
1122 */
1123 if ((tgt_desc[8] >> 4) != 0x06) {
1124 tcmu_dev_err(udev, "Id target NAA designator type: 0x%x\n",
1125 tgt_desc[8] >> 4);
1126 return TCMU_STS_INVALID_PARAM_LIST;
1127 }
1128
1129 /*
1130 * Source designator matches the local device
1131 */
1132 if (index == xcopy->stdi) {
1133 memcpy(&xcopy->src_tid_wwn[0], &tgt_desc[8],
1134 XCOPY_NAA_IEEE_REGEX_LEN);
1135
1136 if (!memcmp(wwn, xcopy->src_tid_wwn, XCOPY_NAA_IEEE_REGEX_LEN))
1137 xcopy->src_dev = udev;
1138 }
1139
1140 /*
1141 * Destination designator matches the local device.
1142 */
1143 if (index == xcopy->dtdi) {
1144 memcpy(xcopy->dst_tid_wwn, &tgt_desc[8],
1145 XCOPY_NAA_IEEE_REGEX_LEN);
1146
1147 if (!memcmp(wwn, xcopy->dst_tid_wwn, XCOPY_NAA_IEEE_REGEX_LEN))
1148 xcopy->dst_dev = udev;
1149 }
1150
1151 return TCMU_STS_OK;
1152 }
1153
1154 static int xcopy_parse_target_descs(struct tcmu_device *udev,
1155 struct xcopy *xcopy,
1156 uint8_t *tgt_desc,
1157 uint16_t tdll)
1158 {
1159 int i, ret;
1160
1161 if (tdll % XCOPY_TARGET_DESC_LEN) {
1162 tcmu_dev_err(udev,
1163 "CSCD descriptor list length %u not a multiple of %u\n",
1164 (unsigned int)tdll, XCOPY_TARGET_DESC_LEN);
1165 return TCMU_STS_NOTSUPP_TGT_DESC_TYPE;
1166 }
1167 /* From spc4r36q,section 6.4.3.4 CSCD DESCRIPTOR LIST LENGTH field
1168 * If the number of CSCD descriptors exceeds the allowed number, the copy
1169 * manager shall terminate the command with CHECK CONDITION status, with
1170 * the sense key set to ILLEGAL REQUEST, and the additional sense code
1171 * set to TOO MANY TARGET DESCRIPTORS.
1172 */
1173 if (tdll > RCR_OP_MAX_TARGET_DESC_COUNT * XCOPY_TARGET_DESC_LEN) {
1174 tcmu_dev_err(udev, "Only %u target descriptor(s) supported, but there are %u\n",
1175 RCR_OP_MAX_TARGET_DESC_COUNT, tdll / XCOPY_TARGET_DESC_LEN);
1176 return TCMU_STS_TOO_MANY_TGT_DESC;
1177 }
1178
1179 for (i = 0; tdll >= XCOPY_TARGET_DESC_LEN; i++) {
1180 /*
1181 * Only Identification Descriptor Target Descriptor support
1182 * for now.
1183 */
1184 if (tgt_desc[0] == XCOPY_TARGET_DESC_TYPE_CODE_ID) {
1185 ret = xcopy_parse_target_id(udev, xcopy, tgt_desc, i);
1186 if (ret != TCMU_STS_OK)
1187 return ret;
1188
1189 tgt_desc += XCOPY_TARGET_DESC_LEN;
1190 tdll -= XCOPY_TARGET_DESC_LEN;
1191 } else {
1192 tcmu_dev_err(udev, "Unsupport target descriptor type code 0x%x\n",
1193 tgt_desc[0]);
1194 return TCMU_STS_NOTSUPP_TGT_DESC_TYPE;
1195 }
1196 }
1197
1198 ret = TCMU_STS_CP_TGT_DEV_NOTCONN;
1199 if (xcopy->src_dev)
1200 ret = xcopy_locate_udev(udev->ctx, xcopy->dst_tid_wwn,
1201 &xcopy->dst_dev);
1202 else if (xcopy->dst_dev)
1203 ret = xcopy_locate_udev(udev->ctx, xcopy->src_tid_wwn,
1204 &xcopy->src_dev);
1205
1206 if (ret) {
1207 tcmu_err("Target device not found, the index are %hu and %hu\n",
1208 xcopy->stdi, xcopy->dtdi);
1209 return TCMU_STS_CP_TGT_DEV_NOTCONN;
1210 }
1211
1212 tcmu_dev_dbg(xcopy->src_dev, "Source device NAA IEEE WWN: 0x%16phN\n",
1213 xcopy->src_tid_wwn);
1214 tcmu_dev_dbg(xcopy->dst_dev, "Destination device NAA IEEE WWN: 0x%16phN\n",
1215 xcopy->dst_tid_wwn);
1216
1217 return TCMU_STS_OK;
1218 }
1219
1220 static int xcopy_parse_parameter_list(struct tcmu_device *dev,
1221 struct tcmulib_cmd *cmd,
1222 struct xcopy *xcopy)
1223 {
1224 uint8_t *cdb = cmd->cdb;
1225 size_t data_length = tcmu_cdb_get_xfer_length(cdb);
1226 struct iovec *iovec = cmd->iovec;
1227 size_t iov_cnt = cmd->iov_cnt;
1228 uint32_t inline_dl;
1229 uint8_t *seg_desc, *tgt_desc, *par;
1230 uint16_t sdll, tdll;
1231 uint64_t num_lbas;
1232 int ret;
1233
1234 /*
1235 * The PARAMETER LIST LENGTH field specifies the length in bytes
1236 * of the parameter data that shall be contained in the Data-Out
1237 * Buffer.
1238 */
1239 par = calloc(1, data_length);
1240 if (!par) {
1241 tcmu_dev_err(dev, "calloc parameter list buffer error\n");
1242 return TCMU_STS_NO_RESOURCE;
1243 }
1244
1245 tcmu_memcpy_from_iovec(par, data_length, iovec, iov_cnt);
1246
1247 /*
1248 * From spc4r31, section 6.18.4 OPERATING PARAMETERS service action
1249 *
1250 * A supports no list identifier (SNLID) bit set to one indicates
1251 * the copy manager supports an EXTENDED COPY (see 6.3) command
1252 * parameter list in which the LIST ID USAGE field is set to 11b
1253 * and the LIST IDENTIFIER field is set to zero as described in
1254 * table 105 (see 6.3.1).
1255 *
1256 * From spc4r31, section 6.3.1 EXTENDED COPY command introduction
1257 *
1258 * LIST ID USAGE == 11b, then the LIST IDENTIFIER field should be
1259 * as zero.
1260 */
1261 tcmu_dev_dbg(dev, "LIST ID USAGE: 0x%x, LIST IDENTIFIER: 0x%x\n",
1262 (par[1] & 0x18) >> 3, par[0]);
1263 if ((par[1] & 0x18) != 0x18 || par[0]) {
1264 tcmu_dev_err(dev, "LIST ID USAGE: 0x%x, LIST IDENTIFIER: 0x%x\n",
1265 (par[1] & 0x18) >> 3, par[0]);
1266 ret = TCMU_STS_INVALID_PARAM_LIST;
1267 goto err;
1268 }
1269
1270 /*
1271 * From spc4r31, section 6.3.6.1 Target descriptors introduction
1272 *
1273 * All target descriptors (see table 108) are 32 bytes or 64 bytes
1274 * in length
1275 * From spc4r36q, section6.4.3.4
1276 * An EXTENDED COPY command may reference one or more CSCDs.
1277 */
1278 tdll = be16toh(*(uint16_t *)&par[2]);
1279 if (tdll < 32 || tdll % 32 != 0) {
1280 tcmu_dev_err(dev, "Illegal target descriptor length %u\n",
1281 tdll);
1282 ret = TCMU_STS_INVALID_PARAM_LIST_LEN;
1283 goto err;
1284 }
1285
1286 /*
1287 * From spc4r31, section 6.3.7.1 Segment descriptors introduction
1288 *
1289 * Segment descriptors (see table 120) begin with an eight byte header.
1290 */
1291 sdll = be32toh(*(uint32_t *)&par[8]);
1292 if (sdll < 8) {
1293 tcmu_dev_err(dev, "Illegal segment descriptor length %u\n",
1294 tdll);
1295 ret = TCMU_STS_INVALID_PARAM_LIST_LEN;
1296 goto err;
1297 }
1298
1299 /*
1300 * The maximum length of the target and segment descriptors permitted
1301 * within a parameter list is indicated by the MAXIMUM DESCRIPTOR LIST
1302 * LENGTH field in the copy managers operating parameters.
1303 */
1304 if (tdll + sdll > RCR_OP_MAX_DESC_LIST_LEN) {
1305 tcmu_dev_err(dev, "descriptor list length %u exceeds maximum %u\n",
1306 tdll + sdll, RCR_OP_MAX_DESC_LIST_LEN);
1307 ret = TCMU_STS_INVALID_PARAM_LIST_LEN;
1308 goto err;
1309 }
1310
1311 /*
1312 * The INLINE DATA LENGTH field contains the number of bytes of inline
1313 * data, after the last segment descriptor.
1314 * */
1315 inline_dl = be32toh(*(uint32_t *)&par[12]);
1316 if (inline_dl != 0) {
1317 tcmu_dev_err(dev, "non-zero xcopy inline_dl %u unsupported\n",
1318 inline_dl);
1319 ret = TCMU_STS_INVALID_PARAM_LIST_LEN;
1320 goto err;
1321 }
1322
1323 /* From spc4r31, section 6.3.1 EXTENDED COPY command introduction
1324 *
1325 * The EXTENDED COPY parameter list (see table 104) begins with a 16
1326 * byte header.
1327 *
1328 * The data length in CDB should be equal to tdll + sdll + inline_dl
1329 * + parameter list header length
1330 */
1331 if (data_length < (XCOPY_HDR_LEN + tdll + sdll + inline_dl)) {
1332 tcmu_dev_err(dev, "Illegal list length: length from CDB is %zu,"
1333 " but here the length is %u\n",
1334 data_length, tdll + sdll + inline_dl);
1335 ret = TCMU_STS_INVALID_PARAM_LIST_LEN;
1336 goto err;
1337 }
1338
1339 tcmu_dev_dbg(dev, "Processing XCOPY with tdll: %hu sdll: %u inline_dl: %u\n",
1340 tdll, sdll, inline_dl);
1341
1342 /*
1343 * Parse the segment descripters and for now we only support block
1344 * -> block type.
1345 *
1346 * The max seg_desc number support is 1(see RCR_OP_MAX_SG_DESC_COUNT)
1347 */
1348 seg_desc = par + XCOPY_HDR_LEN + tdll;
1349 ret = xcopy_parse_segment_descs(seg_desc, xcopy, sdll);
1350 if (ret != TCMU_STS_OK)
1351 goto err;
1352
1353 /*
1354 * Parse the target descripter
1355 *
1356 * The max seg_desc number support is 2(see RCR_OP_MAX_TARGET_DESC_COUNT)
1357 */
1358 tgt_desc = par + XCOPY_HDR_LEN;
1359 ret = xcopy_parse_target_descs(dev, xcopy, tgt_desc, tdll);
1360 if (ret != TCMU_STS_OK)
1361 goto err;
1362
1363 /*
1364 * tcmu-runner can't determine whether the device(s) referred to in an
1365 * XCOPY request should be accessible to the initiator via transport
1366 * settings, ACLs, etc. XXX Consequently, we need to fail any
1367 * cross-device requests for safety reasons.
1368 */
1369 if (dev != xcopy->src_dev || dev != xcopy->dst_dev) {
1370 tcmu_dev_err(dev, "Cross-device XCOPY not supported\n");
1371 ret = TCMU_STS_CP_TGT_DEV_NOTCONN;
1372 goto err;
1373 }
1374
1375 if (tcmu_dev_get_block_size(xcopy->src_dev) !=
1376 tcmu_dev_get_block_size(xcopy->dst_dev)) {
1377 tcmu_dev_err(dev, "The block size of src dev %u != dst dev %u\n",
1378 tcmu_dev_get_block_size(xcopy->src_dev),
1379 tcmu_dev_get_block_size(xcopy->dst_dev));
1380 ret = TCMU_STS_INVALID_CP_TGT_DEV_TYPE;
1381 goto err;
1382 }
1383
1384 num_lbas = tcmu_dev_get_num_lbas(xcopy->src_dev);
1385 if (xcopy->src_lba + xcopy->lba_cnt > num_lbas) {
1386 tcmu_dev_err(xcopy->src_dev,
1387 "src target exceeds last lba %"PRIu64" (lba %"PRIu64", copy len %u\n",
1388 num_lbas, xcopy->src_lba, xcopy->lba_cnt);
1389 ret = TCMU_STS_RANGE;
1390 goto err;
1391 }
1392
1393 num_lbas = tcmu_dev_get_num_lbas(xcopy->dst_dev);
1394 if (xcopy->dst_lba + xcopy->lba_cnt > num_lbas) {
1395 tcmu_dev_err(xcopy->dst_dev,
1396 "dst target exceeds last lba %"PRIu64" (lba %"PRIu64", copy len %u)\n",
1397 num_lbas, xcopy->dst_lba, xcopy->lba_cnt);
1398 ret = TCMU_STS_RANGE;
1399 goto err;
1400 }
1401
1402 free(par);
1403 return TCMU_STS_OK;
1404
1405 err:
1406 free(par);
1407
1408 return ret;
1409 }
1410
1411 static int xcopy_read_work_fn(struct tcmu_device *src_dev, void *data);
1412 static void handle_xcopy_read_cbk(struct tcmu_device *src_dev,
1413 struct tcmur_cmd *tcmur_cmd, int ret);
1414
1415 static void handle_xcopy_write_cbk(struct tcmu_device *dst_dev,
1416 struct tcmur_cmd *tcmur_cmd, int ret)
1417 {
1418 struct xcopy *xcopy = tcmur_cmd->cmd_state;
1419 struct tcmu_device *src_dev = xcopy->src_dev;
1420
1421 /* write failed - bail out */
1422 if (ret != TCMU_STS_OK) {
1423 tcmu_dev_err(src_dev, "Failed to write to dst device!\n");
1424 goto out;
1425 }
1426
1427 xcopy->lba_cnt -= xcopy->copy_lbas;
1428 if (!xcopy->lba_cnt)
1429 goto out;
1430
1431 xcopy->src_lba += xcopy->copy_lbas;
1432 xcopy->dst_lba += xcopy->copy_lbas;
1433 xcopy->copy_lbas = min(xcopy->lba_cnt, xcopy->copy_lbas);
1434 tcmur_cmd->requested = tcmu_lba_to_byte(src_dev, xcopy->copy_lbas);
1435
1436 tcmur_cmd->done = handle_xcopy_read_cbk;
1437 ret = aio_request_schedule(xcopy->src_dev, tcmur_cmd,
1438 xcopy_read_work_fn, tcmur_cmd_complete);
1439 if (ret != TCMU_STS_ASYNC_HANDLED)
1440 goto out;
1441
1442 return;
1443
1444 out:
1445 aio_command_finish(xcopy->origdev, tcmur_cmd->lib_cmd, ret);
1446 tcmur_cmd_state_free(tcmur_cmd);
1447 }
1448
1449 static int xcopy_write_work_fn(struct tcmu_device *dst_dev, void *data)
1450 {
1451 struct tcmur_handler *rhandler = tcmu_get_runner_handler(dst_dev);
1452 struct tcmur_cmd *tcmur_cmd = data;
1453 struct xcopy *xcopy = tcmur_cmd->cmd_state;
1454
1455 tcmur_cmd_iovec_reset(tcmur_cmd, tcmur_cmd->requested);
1456
1457 return rhandler->write(dst_dev, tcmur_cmd, tcmur_cmd->iovec,
1458 tcmur_cmd->iov_cnt, tcmur_cmd->requested,
1459 tcmu_lba_to_byte(dst_dev, xcopy->dst_lba));
1460 }
1461
1462 static void handle_xcopy_read_cbk(struct tcmu_device *src_dev,
1463 struct tcmur_cmd *tcmur_cmd,
1464 int ret)
1465 {
1466 struct xcopy *xcopy = tcmur_cmd->cmd_state;
1467
1468 /* read failed - bail out */
1469 if (ret != TCMU_STS_OK) {
1470 tcmu_dev_err(src_dev, "Failed to read from src device!\n");
1471 goto err;
1472 }
1473
1474 tcmur_cmd->done = handle_xcopy_write_cbk;
1475
1476 ret = aio_request_schedule(xcopy->dst_dev, tcmur_cmd,
1477 xcopy_write_work_fn, tcmur_cmd_complete);
1478 if (ret != TCMU_STS_ASYNC_HANDLED)
1479 goto err;
1480
1481 return;
1482
1483 err:
1484 aio_command_finish(xcopy->origdev, tcmur_cmd->lib_cmd, ret);
1485 tcmur_cmd_state_free(tcmur_cmd);
1486 }
1487
1488 static int xcopy_read_work_fn(struct tcmu_device *src_dev, void *data)
1489 {
1490 struct tcmur_handler *rhandler = tcmu_get_runner_handler(src_dev);
1491 struct tcmur_cmd *tcmur_cmd = data;
1492 struct xcopy *xcopy = tcmur_cmd->cmd_state;
1493
1494 tcmu_dev_dbg(src_dev,
1495 "Copying %u sectors from src (lba:%"PRIu64") to dst (lba:%"PRIu64")\n",
1496 xcopy->copy_lbas, xcopy->src_lba, xcopy->dst_lba);
1497
1498 tcmur_cmd_iovec_reset(tcmur_cmd, tcmur_cmd->requested);
1499
1500 return rhandler->read(src_dev, tcmur_cmd, tcmur_cmd->iovec,
1501 tcmur_cmd->iov_cnt, tcmur_cmd->requested,
1502 tcmu_lba_to_byte(src_dev, xcopy->src_lba));
1503 }
1504
1505 /* async xcopy */
1506 static int handle_xcopy(struct tcmu_device *dev, struct tcmulib_cmd *cmd)
1507 {
1508 struct tcmur_cmd *tcmur_cmd = cmd->hm_private;
1509 uint8_t *cdb = cmd->cdb;
1510 size_t data_length = tcmu_cdb_get_xfer_length(cdb);
1511 uint32_t max_sectors, src_max_sectors, dst_max_sectors;
1512 struct xcopy *xcopy, xcopy_parse;
1513 int ret;
1514
1515 /* spc4r36q section6.4 and 6.5
1516 * EXTENDED_COPY(LID4) :service action 0x01;
1517 * EXTENDED_COPY(LID1) :service action 0x00.
1518 */
1519 if ((cdb[1] & 0x1f) != 0x00) {
1520 tcmu_dev_err(dev, "EXTENDED_COPY(LID4) not supported\n");
1521 return TCMU_STS_INVALID_CMD;
1522 }
1523 /*
1524 * A parameter list length of zero specifies that copy manager
1525 * shall not transfer any data or alter any internal state.
1526 */
1527 if (data_length == 0)
1528 return TCMU_STS_OK;
1529
1530 /*
1531 * The EXTENDED COPY parameter list begins with a 16 byte header
1532 * that contains the LIST IDENTIFIER field.
1533 */
1534 if (data_length < XCOPY_HDR_LEN) {
1535 tcmu_dev_err(dev, "Illegal parameter list: length %zu < hdr_len %u\n",
1536 data_length, XCOPY_HDR_LEN);
1537 return TCMU_STS_INVALID_PARAM_LIST_LEN;
1538 }
1539
1540 memset(&xcopy_parse, 0, sizeof(xcopy_parse));
1541 /* Parse and check the parameter list */
1542 ret = xcopy_parse_parameter_list(dev, cmd, &xcopy_parse);
1543 if (ret != 0)
1544 return ret;
1545
1546 /* Nothing to do with BLOCK DEVICE NUMBER OF BLOCKS set to zero */
1547 if (!xcopy_parse.lba_cnt)
1548 return TCMU_STS_OK;
1549
1550 src_max_sectors = tcmu_dev_get_opt_xcopy_rw_len(xcopy_parse.src_dev);
1551 dst_max_sectors = tcmu_dev_get_opt_xcopy_rw_len(xcopy_parse.dst_dev);
1552
1553 max_sectors = min(src_max_sectors, dst_max_sectors);
1554 xcopy_parse.copy_lbas = min(max_sectors, xcopy_parse.lba_cnt);
1555
1556 if (tcmur_cmd_state_init(tcmur_cmd, sizeof(*xcopy),
1557 tcmu_lba_to_byte(xcopy_parse.src_dev,
1558 xcopy_parse.copy_lbas))) {
1559 tcmu_dev_err(dev, "calloc xcopy data error\n");
1560 return TCMU_STS_NO_RESOURCE;
1561 }
1562 tcmur_cmd->done = handle_xcopy_read_cbk;
1563
1564 xcopy = tcmur_cmd->cmd_state;
1565 memcpy(xcopy, &xcopy_parse, sizeof(*xcopy));
1566 xcopy->origdev = dev;
1567
1568 ret = aio_request_schedule(xcopy->src_dev, tcmur_cmd,
1569 xcopy_read_work_fn, tcmur_cmd_complete);
1570 if (ret == TCMU_STS_ASYNC_HANDLED)
1571 return ret;
1572
1573 tcmur_cmd_state_free(tcmur_cmd);
1574 return ret;
1575 }
1576
1577 /* async compare_and_write */
1578
1579 static void handle_caw_write_cbk(struct tcmu_device *dev,
1580 struct tcmur_cmd *tcmur_cmd, int ret)
1581 {
1582 struct tcmur_device *rdev = tcmu_dev_get_private(dev);
1583 struct tcmulib_cmd *cmd = tcmur_cmd->lib_cmd;
1584
1585 pthread_mutex_unlock(&rdev->caw_lock);
1586 tcmur_cmd_state_free(tcmur_cmd);
1587 aio_command_finish(dev, cmd, ret);
1588 }
1589
1590 static int caw_work_fn(struct tcmu_device *dev, void *data)
1591 {
1592 struct tcmur_handler *rhandler = tcmu_get_runner_handler(dev);
1593 struct tcmur_cmd *tcmur_cmd = data;
1594 struct tcmulib_cmd *cmd = tcmur_cmd->lib_cmd;
1595
1596 if (tcmur_cmd->done == handle_caw_write_cbk) {
1597 return rhandler->write(dev, tcmur_cmd, cmd->iovec, cmd->iov_cnt,
1598 tcmur_cmd->requested,
1599 tcmu_cdb_to_byte(dev, cmd->cdb));
1600
1601 } else {
1602 return rhandler->read(dev, tcmur_cmd, tcmur_cmd->iovec,
1603 tcmur_cmd->iov_cnt, tcmur_cmd->requested,
1604 tcmu_cdb_to_byte(dev, cmd->cdb));
1605 }
1606 }
1607
1608 static void handle_caw_read_cbk(struct tcmu_device *dev,
1609 struct tcmur_cmd *tcmur_cmd, int ret)
1610 {
1611 struct tcmur_device *rdev = tcmu_dev_get_private(dev);
1612 struct tcmulib_cmd *cmd = tcmur_cmd->lib_cmd;
1613 uint32_t cmp_offset;
1614
1615 /* read failed - bail out */
1616 if (ret != TCMU_STS_OK)
1617 goto finish_err;
1618
1619 cmp_offset = tcmu_iovec_compare(tcmur_cmd->iov_base_copy, cmd->iovec,
1620 tcmur_cmd->requested);
1621 if (cmp_offset != -1) {
1622 /* verify failed - bail out */
1623 ret = TCMU_STS_MISCOMPARE;
1624 tcmu_sense_set_info(cmd->sense_buf, cmp_offset);
1625 goto finish_err;
1626 }
1627
1628 /* perform write */
1629 tcmu_cmd_seek(cmd, tcmur_cmd->requested);
1630 tcmur_cmd->done = handle_caw_write_cbk;
1631
1632 ret = aio_request_schedule(dev, tcmur_cmd, caw_work_fn,
1633 tcmur_cmd_complete);
1634 if (ret != TCMU_STS_ASYNC_HANDLED)
1635 goto finish_err;
1636
1637 return;
1638
1639 finish_err:
1640 pthread_mutex_unlock(&rdev->caw_lock);
1641 tcmur_cmd_state_free(tcmur_cmd);
1642 aio_command_finish(dev, cmd, ret);
1643 }
1644
1645 static int handle_caw_check(struct tcmu_device *dev, struct tcmulib_cmd *cmd)
1646 {
1647 int ret;
1648 uint64_t start_lba = tcmu_cdb_get_lba(cmd->cdb);
1649 uint8_t sectors = cmd->cdb[13];
1650
1651 /* From sbc4r12a section 5.3 COMPARE AND WRITE command
1652 * If the number of logical blocks exceeds the value in the
1653 * MAXIMUM COMPARE AND WRITE LENGTH field(see 6.64 block limits VPD page)
1654 * then the device server shall terminate the command with CHECK CONDITION
1655 * status with the sense key set to ILLEGAL REQUEST and the additional sense
1656 * code set to INVALID FIELD IN CDB.
1657 */
1658 if (sectors > MAX_CAW_LENGTH) {
1659 tcmu_dev_err(dev, "Received caw length %u greater than max caw length %u.\n",
1660 sectors, MAX_CAW_LENGTH);
1661 return TCMU_STS_INVALID_CDB;
1662 }
1663 /* double sectors since we have two buffers */
1664 ret = check_iovec_length(dev, cmd, sectors * 2);
1665 if (ret)
1666 return ret;
1667
1668 ret = check_lbas(dev, start_lba, sectors);
1669 if (ret)
1670 return ret;
1671
1672 return TCMU_STS_OK;
1673 }
1674
1675 static int tcmur_caw_fn(struct tcmu_device *dev, void *data)
1676 {
1677 struct tcmur_cmd *tcmur_cmd = data;
1678 struct tcmulib_cmd *cmd = tcmur_cmd->lib_cmd;
1679 tcmur_caw_fn_t caw_fn = tcmur_cmd->cmd_state;
1680 uint64_t off = tcmu_cdb_to_byte(dev, cmd->cdb);
1681 size_t half = (tcmu_iovec_length(cmd->iovec, cmd->iov_cnt)) / 2;
1682
1683 return caw_fn(dev, tcmur_cmd, off, half, cmd->iovec, cmd->iov_cnt);
1684 }
1685
1686 static int handle_caw(struct tcmu_device *dev, struct tcmulib_cmd *cmd)
1687 {
1688 struct tcmur_handler *rhandler = tcmu_get_runner_handler(dev);
1689 struct tcmur_cmd *tcmur_cmd = cmd->hm_private;
1690 size_t half = (tcmu_iovec_length(cmd->iovec, cmd->iov_cnt)) / 2;
1691 struct tcmur_device *rdev = tcmu_dev_get_private(dev);
1692 uint8_t sectors = cmd->cdb[13];
1693 int ret;
1694
1695 if (tcmu_dev_in_recovery(dev))
1696 return TCMU_STS_BUSY;
1697
1698 ret = alua_check_state(dev, cmd, false);
1699 if (ret)
1700 return ret;
1701
1702 /* From sbc4r12a section 5.3 COMPARE AND WRITE command
1703 * A NUMBER OF LOGICAL BLOCKS field set to zero specifies that no
1704 * read operations shall be performed, no logical block data shall
1705 * be transferred from the Data-Out Buffer, no compare operations
1706 * shall be performed, and no write operations shall be performed.
1707 * This condition shall not be considered an error.
1708 */
1709 if (!sectors) {
1710 tcmu_dev_dbg(dev, "NUMBER OF LOGICAL BLOCKS is zero, just return ok.\n");
1711 return TCMU_STS_OK;
1712 }
1713
1714 ret = handle_caw_check(dev, cmd);
1715 if (ret)
1716 return ret;
1717
1718 if (rhandler->caw) {
1719 tcmur_cmd->cmd_state = rhandler->caw;
1720 tcmur_cmd->done = handle_generic_cbk;
1721 return aio_request_schedule(dev, tcmur_cmd, tcmur_caw_fn,
1722 tcmur_cmd_complete);
1723 }
1724
1725 if (tcmur_cmd_state_init(tcmur_cmd, 0, half))
1726 return TCMU_STS_NO_RESOURCE;
1727
1728 tcmur_cmd->done = handle_caw_read_cbk;
1729
1730 pthread_mutex_lock(&rdev->caw_lock);
1731
1732 ret = aio_request_schedule(dev, tcmur_cmd, caw_work_fn,
1733 tcmur_cmd_complete);
1734 if (ret == TCMU_STS_ASYNC_HANDLED)
1735 return TCMU_STS_ASYNC_HANDLED;
1736
1737 pthread_mutex_unlock(&rdev->caw_lock);
1738 tcmur_cmd_state_free(tcmur_cmd);
1739 return ret;
1740 }
1741
1742 /* async flush */
1743 static int flush_work_fn(struct tcmu_device *dev, void *data)
1744 {
1745 struct tcmur_handler *rhandler = tcmu_get_runner_handler(dev);
1746
1747 return rhandler->flush(dev, data);
1748 }
1749
1750 static int handle_flush(struct tcmu_device *dev, struct tcmulib_cmd *cmd)
1751 {
1752 struct tcmur_handler *rhandler = tcmu_get_runner_handler(dev);
1753 struct tcmur_cmd *tcmur_cmd = cmd->hm_private;
1754
1755 if (!rhandler->flush)
1756 return TCMU_STS_INVALID_CMD;
1757
1758 tcmur_cmd->done = handle_generic_cbk;
1759 return aio_request_schedule(dev, tcmur_cmd, flush_work_fn,
1760 tcmur_cmd_complete);
1761 }
1762
1763 static int handle_recv_copy_result(struct tcmu_device *dev, struct tcmulib_cmd *cmd)
1764 {
1765 struct iovec *iovec = cmd->iovec;
1766 size_t iov_cnt = cmd->iov_cnt;
1767 uint8_t buf[128];
1768 uint16_t val16;
1769 uint32_t val32;
1770
1771 memset(buf, 0, sizeof(buf));
1772
1773 /*
1774 * From spc4r31, section 6.18.4 OPERATING PARAMETERS service
1775 * action
1776 */
1777
1778 /*
1779 * SNLID = 1: the copy manager will support an EXTENDED COPY
1780 * command parameter list in which the LIST ID USAGE field is
1781 * set to 11b
1782 */
1783 buf[4] = 0x01;
1784
1785 /*
1786 * MAXIMUM TARGET COUNT: the max number of target descriptors
1787 * that the copy manager allows in a single EXTENDED COPY
1788 * target descriptor list.
1789 */
1790 val16 = htobe16(RCR_OP_MAX_TARGET_DESC_COUNT);
1791 memcpy(&buf[8], &val16, 2);
1792
1793 /*
1794 * MAXIMUM SEGMENT COUNT: the max number of segment descriptors
1795 * that the copy manager allows in a single EXTENDED COPY
1796 * segment descriptor list.
1797 */
1798 val16 = htobe16(RCR_OP_MAX_SEGMENT_DESC_COUNT);
1799 memcpy(&buf[10], &val16, 2);
1800
1801 /*
1802 * MAXIMUM DESCRIPTOR LIST LENGTH: the max length, in bytes,
1803 * of the target descriptor list and segment descriptor list.
1804 */
1805 val32 = htobe32(RCR_OP_MAX_DESC_LIST_LEN);
1806 memcpy(&buf[12], &val32, 4);
1807
1808 /*
1809 * MAXIMUM SEGMENT LENGTH: the length, in bytes, of the largest
1810 * amount of data that the copy manager supports writing via a
1811 * single segment.
1812 */
1813 val32 = htobe32(RCR_OP_MAX_SEGMENT_LEN);
1814 memcpy(&buf[16], &val32, 4);
1815
1816 /*
1817 * MAXIMUM CONCURRENT COPIES: the max number of EXTENDED COPY
1818 * commands with the LIST ID USAGE field set to 00b or 10b that
1819 * are supported for concurrent processing by the copy manager.
1820 */
1821 val16 = htobe16(RCR_OP_TOTAL_CONCURR_COPIES);
1822 memcpy(&buf[34], &val16, 2);
1823
1824 /*
1825 * MAXIMUM CONCURRENT COPIES: the max number of EXTENDED COPY
1826 * commands with the LIST ID USAGE field set to 00b or 10b that
1827 * are supported for concurrent processing by the copy manager.
1828 */
1829 buf[36] = RCR_OP_MAX_CONCURR_COPIES;
1830
1831 /*
1832 * DATA SEGMENT GRANULARITY: the length of the smallest data
1833 * block that copy manager permits in a non-inline segment
1834 * descriptor. In power of two.
1835 */
1836 buf[37] = RCR_OP_DATA_SEG_GRAN_LOG2;
1837
1838 /*
1839 * INLINE DATA GRANULARITY: the length of the of the smallest
1840 * block of inline data that the copy manager permits being
1841 * written by a segment descriptor containing the 04h descriptor
1842 * type code (see 6.3.7.7). In power of two.
1843 */
1844 buf[38] = RCR_OP_INLINE_DATA_GRAN_LOG2;
1845
1846 /*
1847 * HELD DATA GRANULARITY: the length of the smallest block of
1848 * held data that the copy manager shall transfer to the
1849 * application client in response to a RECEIVE COPY RESULTS
1850 * command with RECEIVE DATA service action (see 6.18.3).
1851 * In power of two.
1852 */
1853 buf[39] = RCR_OP_HELD_DATA_GRAN_LOG2;
1854
1855 /*
1856 * IMPLEMENTED DESCRIPTOR LIST LENGTH: the length, in bytes, of
1857 * the list of implemented descriptor type codes.
1858 */
1859 buf[43] = RCR_OP_IMPLE_DES_LIST_LENGTH;
1860
1861 /*
1862 * The list of implemented descriptor type codes: one byte for
1863 * each segment or target DESCRIPTOR TYPE CODE value (see 6.3.5)
1864 * supported by the copy manager,
1865 */
1866 buf[44] = XCOPY_SEG_DESC_TYPE_CODE_B2B; /* block --> block */
1867 buf[45] = XCOPY_TARGET_DESC_TYPE_CODE_ID; /* Identification descriptor */
1868
1869 /* AVAILABLE DATA (n-3)*/
1870 val32 = htobe32(42);
1871 memcpy(&buf[0], &val32, 4);
1872
1873 tcmu_memcpy_into_iovec(iovec, iov_cnt, buf, sizeof(buf));
1874
1875 return TCMU_STS_OK;
1876 }
1877
1878 /* async write */
1879 static int handle_write(struct tcmu_device *dev, struct tcmulib_cmd *cmd)
1880 {
1881 struct tcmur_cmd *tcmur_cmd = cmd->hm_private;
1882 int ret;
1883
1884 ret = check_lba_and_length(dev, cmd, tcmu_cdb_get_xfer_length(cmd->cdb));
1885 if (ret)
1886 return ret;
1887
1888 tcmur_cmd->done = handle_generic_cbk;
1889 return aio_request_schedule(dev, tcmur_cmd, write_work_fn,
1890 tcmur_cmd_complete);
1891 }
1892
1893 /* async read */
1894 static int handle_read(struct tcmu_device *dev, struct tcmulib_cmd *cmd)
1895 {
1896 struct tcmur_cmd *tcmur_cmd = cmd->hm_private;
1897 int ret;
1898
1899 ret = check_lba_and_length(dev, cmd, tcmu_cdb_get_xfer_length(cmd->cdb));
1900 if (ret)
1901 return ret;
1902
1903 tcmur_cmd->done = handle_generic_cbk;
1904 return aio_request_schedule(dev, tcmur_cmd, read_work_fn,
1905 tcmur_cmd_complete);
1906 }
1907
1908 /* FORMAT UNIT */
1909 struct format_unit_state {
1910 size_t length;
1911 off_t offset;
1912 uint32_t done_blocks;
1913 };
1914
1915 static int format_unit_work_fn(struct tcmu_device *dev, void *data)
1916 {
1917 struct tcmur_handler *rhandler = tcmu_get_runner_handler(dev);
1918 struct tcmur_cmd *tcmur_cmd = data;
1919 struct format_unit_state *state = tcmur_cmd->cmd_state;
1920
1921 return rhandler->write(dev, tcmur_cmd, tcmur_cmd->iovec,
1922 tcmur_cmd->iov_cnt, tcmur_cmd->requested,
1923 state->offset);
1924 }
1925
1926 static void handle_format_unit_cbk(struct tcmu_device *dev,
1927 struct tcmur_cmd *tcmur_cmd, int ret) {
1928 struct tcmur_device *rdev = tcmu_dev_get_private(dev);
1929 struct tcmulib_cmd *cmd = tcmur_cmd->lib_cmd;
1930 struct format_unit_state *state = tcmur_cmd->cmd_state;
1931 uint32_t block_size = tcmu_dev_get_block_size(dev);
1932 int rc;
1933
1934 state->offset += tcmur_cmd->requested;
1935 state->done_blocks += tcmu_byte_to_lba(dev, tcmur_cmd->requested);
1936 if (state->done_blocks < dev->num_lbas)
1937 rdev->format_progress = (0x10000 * state->done_blocks) /
1938 dev->num_lbas;
1939
1940 /* Check for last commmand */
1941 if (state->done_blocks == dev->num_lbas) {
1942 tcmu_dev_dbg(dev,
1943 "last format cmd, done_blocks:%u num_lbas:%"PRIu64" block_size:%u\n",
1944 state->done_blocks, dev->num_lbas, block_size);
1945 goto free_state;
1946 }
1947
1948 if (state->done_blocks < dev->num_lbas) {
1949 size_t left = tcmu_lba_to_byte(dev,
1950 dev->num_lbas - state->done_blocks);
1951 if (left < tcmur_cmd->requested)
1952 tcmur_cmd->requested = left;
1953
1954 /* Seek in handlers consume the iovec, thus we must reset */
1955 tcmur_cmd_iovec_reset(tcmur_cmd, tcmur_cmd->requested);
1956
1957 tcmu_dev_dbg(dev,
1958 "next format cmd, done_blocks:%u num_lbas:%"PRIu64" block_size:%u\n",
1959 state->done_blocks, dev->num_lbas, block_size);
1960
1961 rc = aio_request_schedule(dev, tcmur_cmd, format_unit_work_fn,
1962 tcmur_cmd_complete);
1963 if (rc != TCMU_STS_ASYNC_HANDLED) {
1964 tcmu_dev_err(dev, " async handle cmd failure\n");
1965 ret = TCMU_STS_WR_ERR;
1966 goto free_state;
1967 }
1968 }
1969
1970 return;
1971
1972 free_state:
1973 tcmur_cmd_state_free(tcmur_cmd);
1974 pthread_mutex_lock(&rdev->format_lock);
1975 rdev->flags &= ~TCMUR_DEV_FLAG_FORMATTING;
1976 pthread_mutex_unlock(&rdev->format_lock);
1977 aio_command_finish(dev, cmd, ret);
1978 }
1979
1980 static int handle_format_unit(struct tcmu_device *dev, struct tcmulib_cmd *cmd) {
1981 struct tcmur_device *rdev = tcmu_dev_get_private(dev);
1982 struct tcmur_cmd *tcmur_cmd = cmd->hm_private;
1983 size_t max_xfer_length, length = 1024 * 1024;
1984 uint32_t block_size = tcmu_dev_get_block_size(dev);
1985 uint64_t num_lbas = tcmu_dev_get_num_lbas(dev);
1986 int ret;
1987
1988 pthread_mutex_lock(&rdev->format_lock);
1989 if (rdev->flags & TCMUR_DEV_FLAG_FORMATTING) {
1990 pthread_mutex_unlock(&rdev->format_lock);
1991 tcmu_sense_set_key_specific_info(cmd->sense_buf,
1992 rdev->format_progress);
1993 return TCMU_STS_FRMT_IN_PROGRESS;
1994 }
1995 rdev->format_progress = 0;
1996 rdev->flags |= TCMUR_DEV_FLAG_FORMATTING;
1997 pthread_mutex_unlock(&rdev->format_lock);
1998
1999 max_xfer_length = tcmu_dev_get_max_xfer_len(dev) * block_size;
2000 length = round_up(length, max_xfer_length);
2001 /* Check length on first write to make sure its not less than 1MB */
2002 if (tcmu_lba_to_byte(dev, num_lbas) < length)
2003 length = tcmu_lba_to_byte(dev, num_lbas);
2004
2005 if (tcmur_cmd_state_init(tcmur_cmd, sizeof(struct format_unit_state),
2006 length))
2007 goto clear_format;
2008 tcmur_cmd->done = handle_format_unit_cbk;
2009
2010 tcmu_dev_dbg(dev, "start emulate format, num_lbas:%"PRIu64" block_size:%u\n",
2011 num_lbas, block_size);
2012
2013 ret = aio_request_schedule(dev, tcmur_cmd, format_unit_work_fn,
2014 tcmur_cmd_complete);
2015 if (ret != TCMU_STS_ASYNC_HANDLED)
2016 goto free_state;
2017
2018 return TCMU_STS_ASYNC_HANDLED;
2019
2020 free_state:
2021 tcmur_cmd_state_free(tcmur_cmd);
2022 clear_format:
2023 pthread_mutex_lock(&rdev->format_lock);
2024 rdev->flags &= ~TCMUR_DEV_FLAG_FORMATTING;
2025 pthread_mutex_unlock(&rdev->format_lock);
2026 return TCMU_STS_NO_RESOURCE;
2027 }
2028
2029 /* ALUA */
2030 static int handle_stpg(struct tcmu_device *dev, struct tcmulib_cmd *cmd)
2031 {
2032 struct list_head group_list;
2033 int ret;
2034
2035 list_head_init(&group_list);
2036
2037 if (tcmu_get_alua_grps(dev, &group_list))
2038 return TCMU_STS_HW_ERR;
2039
2040 ret = tcmu_emulate_set_tgt_port_grps(dev, &group_list, cmd);
2041 tcmu_release_alua_grps(&group_list);
2042 return ret;
2043 }
2044
2045 static int handle_rtpg(struct tcmu_device *dev, struct tcmulib_cmd *cmd)
2046 {
2047 struct list_head group_list;
2048 int ret;
2049
2050 list_head_init(&group_list);
2051
2052 if (tcmu_get_alua_grps(dev, &group_list))
2053 return TCMU_STS_HW_ERR;
2054
2055 ret = tcmu_emulate_report_tgt_port_grps(dev, &group_list, cmd);
2056 tcmu_release_alua_grps(&group_list);
2057 return ret;
2058 }
2059
2060 /* command passthrough */
2061 static int passthrough_work_fn(struct tcmu_device *dev, void *data)
2062 {
2063 struct tcmur_handler *rhandler = tcmu_get_runner_handler(dev);
2064
2065 return rhandler->handle_cmd(dev, data);
2066 }
2067
2068 static int handle_passthrough(struct tcmu_device *dev,
2069 struct tcmur_cmd *tcmur_cmd)
2070 {
2071 tcmur_cmd->done = handle_generic_cbk;
2072 return aio_request_schedule(dev, tcmur_cmd, passthrough_work_fn,
2073 tcmur_cmd_complete);
2074 }
2075
2076 bool tcmur_handler_is_passthrough_only(struct tcmur_handler *rhandler)
2077 {
2078 if (rhandler->write || rhandler->read || rhandler->flush)
2079 return false;
2080
2081 return true;
2082 }
2083
2084 int tcmur_cmd_passthrough_handler(struct tcmu_device *dev,
2085 struct tcmulib_cmd *cmd)
2086 {
2087 struct tcmur_handler *rhandler = tcmu_get_runner_handler(dev);
2088 struct tcmur_device *rdev = tcmu_dev_get_private(dev);
2089 struct tcmur_cmd *tcmur_cmd = cmd->hm_private;
2090 int ret;
2091
2092 if (!rhandler->handle_cmd)
2093 return TCMU_STS_NOT_HANDLED;
2094
2095 /*
2096 * Support handlers that implement their own threading/AIO
2097 * and only use runner's main event loop.
2098 */
2099 if (!rhandler->nr_threads)
2100 return rhandler->handle_cmd(dev, tcmur_cmd);
2101 /*
2102 * Since we call ->handle_cmd via aio_request_schedule(), ->handle_cmd
2103 * can finish in the callers context(asynchronous handler) or work
2104 * queue context (synchronous handlers), thus we'd need to check if
2105 * ->handle_cmd handled the passthough command here as well as in
2106 * handle_passthrough_cbk().
2107 */
2108 track_aio_request_start(rdev);
2109 ret = handle_passthrough(dev, tcmur_cmd);
2110 if (ret != TCMU_STS_ASYNC_HANDLED)
2111 track_aio_request_finish(rdev, NULL);
2112
2113 return ret;
2114 }
2115
2116 static int tcmur_cmd_handler(struct tcmu_device *dev, struct tcmulib_cmd *cmd)
2117 {
2118 int ret = TCMU_STS_NOT_HANDLED;
2119 struct tcmur_handler *rhandler = tcmu_get_runner_handler(dev);
2120 struct tcmur_device *rdev = tcmu_dev_get_private(dev);
2121 uint8_t *cdb = cmd->cdb;
2122 bool is_read = false;
2123
2124 track_aio_request_start(rdev);
2125
2126 if (tcmu_dev_in_recovery(dev)) {
2127 ret = TCMU_STS_BUSY;
2128 goto untrack;
2129 }
2130
2131 /* Don't perform alua implicit transition if command is not supported */
2132 switch(cdb[0]) {
2133 /* Skip to grab the lock for reads */
2134 case READ_6:
2135 case READ_10:
2136 case READ_12:
2137 case READ_16:
2138 is_read = true;
2139 case WRITE_6:
2140 case WRITE_10:
2141 case WRITE_12:
2142 case WRITE_16:
2143 case UNMAP:
2144 case SYNCHRONIZE_CACHE:
2145 case SYNCHRONIZE_CACHE_16:
2146 case EXTENDED_COPY:
2147 case COMPARE_AND_WRITE:
2148 case WRITE_VERIFY:
2149 case WRITE_VERIFY_16:
2150 case WRITE_SAME:
2151 case WRITE_SAME_16:
2152 case FORMAT_UNIT:
2153 ret = alua_check_state(dev, cmd, is_read);
2154 if (ret)
2155 goto untrack;
2156 break;
2157 default:
2158 break;
2159 }
2160
2161 switch(cdb[0]) {
2162 case READ_6:
2163 case READ_10:
2164 case READ_12:
2165 case READ_16:
2166 ret = handle_read(dev, cmd);
2167 break;
2168 case WRITE_6:
2169 case WRITE_10:
2170 case WRITE_12:
2171 case WRITE_16:
2172 ret = handle_write(dev, cmd);
2173 break;
2174 case UNMAP:
2175 ret = handle_unmap(dev, cmd);
2176 break;
2177 case SYNCHRONIZE_CACHE:
2178 case SYNCHRONIZE_CACHE_16:
2179 if (rhandler->flush)
2180 ret = handle_flush(dev, cmd);
2181 break;
2182 case EXTENDED_COPY:
2183 ret = handle_xcopy(dev, cmd);
2184 break;
2185 case COMPARE_AND_WRITE:
2186 ret = handle_caw(dev, cmd);
2187 break;
2188 case WRITE_VERIFY:
2189 case WRITE_VERIFY_16:
2190 ret = handle_write_verify(dev, cmd);
2191 break;
2192 case WRITE_SAME:
2193 case WRITE_SAME_16:
2194 ret = handle_writesame(dev, cmd);
2195 break;
2196 case FORMAT_UNIT:
2197 ret = handle_format_unit(dev, cmd);
2198 break;
2199 default:
2200 ret = TCMU_STS_NOT_HANDLED;
2201 }
2202
2203 untrack:
2204 if (ret != TCMU_STS_ASYNC_HANDLED)
2205 track_aio_request_finish(rdev, NULL);
2206 return ret;
2207 }
2208
2209 static int handle_inquiry(struct tcmu_device *dev, struct tcmulib_cmd *cmd)
2210 {
2211 struct list_head group_list;
2212 struct tgt_port *port;
2213 int ret;
2214
2215 list_head_init(&group_list);
2216
2217 if (tcmu_get_alua_grps(dev, &group_list))
2218 return TCMU_STS_HW_ERR;
2219
2220 port = tcmu_get_enabled_port(&group_list);
2221 if (!port) {
2222 tcmu_dev_dbg(dev, "no enabled ports found. Skipping ALUA support\n");
2223 } else {
2224 tcmu_update_dev_lock_state(dev);
2225 }
2226
2227 ret = tcmu_emulate_inquiry(dev, port, cmd->cdb, cmd->iovec,
2228 cmd->iov_cnt);
2229 tcmu_release_alua_grps(&group_list);
2230 return ret;
2231 }
2232
2233 static int handle_sync_cmd(struct tcmu_device *dev, struct tcmulib_cmd *cmd)
2234 {
2235 uint8_t *cdb = cmd->cdb;
2236 struct iovec *iovec = cmd->iovec;
2237 size_t iov_cnt = cmd->iov_cnt;
2238 uint32_t block_size = tcmu_dev_get_block_size(dev);
2239 uint64_t num_lbas = tcmu_dev_get_num_lbas(dev);
2240
2241 switch (cdb[0]) {
2242 case INQUIRY:
2243 return handle_inquiry(dev, cmd);
2244 case TEST_UNIT_READY:
2245 return tcmu_emulate_test_unit_ready(cdb, iovec, iov_cnt);
2246 case SERVICE_ACTION_IN_16:
2247 if (cdb[1] == READ_CAPACITY_16)
2248 return tcmu_emulate_read_capacity_16(num_lbas,
2249 block_size,
2250 cdb, iovec,
2251 iov_cnt);
2252 else
2253 return TCMU_STS_NOT_HANDLED;
2254 case READ_CAPACITY:
2255 if ((cdb[1] & 0x01) || (cdb[8] & 0x01))
2256 /* Reserved bits for MM logical units */
2257 return TCMU_STS_INVALID_CDB;
2258 else
2259 return tcmu_emulate_read_capacity_10(num_lbas,
2260 block_size,
2261 cdb, iovec,
2262 iov_cnt);
2263 case MODE_SENSE:
2264 case MODE_SENSE_10:
2265 return tcmu_emulate_mode_sense(dev, cdb, iovec, iov_cnt);
2266 case START_STOP:
2267 return tcmu_emulate_start_stop(dev, cdb);
2268 case MODE_SELECT:
2269 case MODE_SELECT_10:
2270 return tcmu_emulate_mode_select(dev, cdb, iovec, iov_cnt);
2271 case RECEIVE_COPY_RESULTS:
2272 if ((cdb[1] & 0x1f) == RCR_SA_OPERATING_PARAMETERS)
2273 return handle_recv_copy_result(dev, cmd);
2274 return TCMU_STS_NOT_HANDLED;
2275 case MAINTENANCE_OUT:
2276 if (cdb[1] == MO_SET_TARGET_PGS)
2277 return handle_stpg(dev, cmd);
2278 return TCMU_STS_NOT_HANDLED;
2279 case MAINTENANCE_IN:
2280 if ((cdb[1] & 0x1f) == MI_REPORT_TARGET_PGS)
2281 return handle_rtpg(dev, cmd);
2282 return TCMU_STS_NOT_HANDLED;
2283 default:
2284 return TCMU_STS_NOT_HANDLED;
2285 }
2286 }
2287
2288 static int handle_try_passthrough(struct tcmu_device *dev,
2289 struct tcmulib_cmd *cmd)
2290 {
2291 struct tcmur_handler *rhandler = tcmu_get_runner_handler(dev);
2292 struct tcmur_device *rdev = tcmu_dev_get_private(dev);
2293 struct tcmur_cmd *tcmur_cmd = cmd->hm_private;
2294 int ret;
2295
2296 if (!rhandler->handle_cmd)
2297 return TCMU_STS_NOT_HANDLED;
2298
2299 track_aio_request_start(rdev);
2300
2301 ret = rhandler->handle_cmd(dev, tcmur_cmd);
2302 if (ret != TCMU_STS_ASYNC_HANDLED)
2303 track_aio_request_finish(rdev, NULL);
2304
2305 return ret;
2306 }
2307
2308 int tcmur_dev_update_size(struct tcmu_device *dev, uint64_t new_size)
2309 {
2310 uint64_t old_size_lbas;
2311 int ret;
2312
2313 if (!new_size)
2314 return -EINVAL;
2315
2316 old_size_lbas = tcmu_dev_get_num_lbas(dev);
2317
2318 tcmu_dev_set_num_lbas(dev, tcmu_byte_to_lba(dev, new_size));
2319 ret = tcmu_cfgfs_dev_set_ctrl_u64(dev, "dev_size", new_size);
2320 if (ret)
2321 tcmu_dev_set_num_lbas(dev, old_size_lbas);
2322 else
2323 tcmur_set_pending_ua(dev, TCMUR_UA_DEV_SIZE_CHANGED);
2324 return ret;
2325 }
2326
2327 void tcmur_set_pending_ua(struct tcmu_device *dev, int ua)
2328 {
2329 struct tcmur_device *rdev = tcmu_dev_get_private(dev);
2330
2331 pthread_mutex_lock(&rdev->state_lock);
2332 rdev->pending_uas |= (1 << ua);
2333 pthread_mutex_unlock(&rdev->state_lock);
2334 }
2335
2336 /*
2337 * TODO - coordinate with the kernel.
2338 */
2339 static int handle_pending_ua(struct tcmur_device *rdev, struct tcmulib_cmd *cmd)
2340 {
2341 uint8_t *cdb = cmd->cdb;
2342 int ret = TCMU_STS_NOT_HANDLED, ua;
2343
2344 switch (cdb[0]) {
2345 case INQUIRY:
2346 case REQUEST_SENSE:
2347 /* The kernel will handle REPORT_LUNS */
2348 return TCMU_STS_NOT_HANDLED;
2349 }
2350 pthread_mutex_lock(&rdev->state_lock);
2351
2352 if (!rdev->pending_uas) {
2353 ret = TCMU_STS_NOT_HANDLED;
2354 goto unlock;
2355 }
2356
2357 ua = ffs(rdev->pending_uas) - 1;
2358 switch (ua) {
2359 case TCMUR_UA_DEV_SIZE_CHANGED:
2360 ret = TCMU_STS_CAPACITY_CHANGED;
2361 break;
2362 }
2363 rdev->pending_uas &= ~(1 << ua);
2364
2365 unlock:
2366 pthread_mutex_unlock(&rdev->state_lock);
2367 return ret;
2368 }
2369
2370 int tcmur_generic_handle_cmd(struct tcmu_device *dev, struct tcmulib_cmd *cmd)
2371 {
2372 struct tcmur_device *rdev = tcmu_dev_get_private(dev);
2373 int ret;
2374
2375 ret = handle_pending_ua(rdev, cmd);
2376 if (ret != TCMU_STS_NOT_HANDLED)
2377 return ret;
2378
2379 if (rdev->flags & TCMUR_DEV_FLAG_FORMATTING && cmd->cdb[0] != INQUIRY) {
2380 tcmu_sense_set_key_specific_info(cmd->sense_buf,
2381 rdev->format_progress);
2382 return TCMU_STS_FRMT_IN_PROGRESS;
2383 }
2384
2385 /*
2386 * The handler want to handle some commands by itself,
2387 * try to passthrough it first
2388 */
2389 ret = handle_try_passthrough(dev, cmd);
2390 if (ret != TCMU_STS_NOT_HANDLED)
2391 return ret;
2392
2393 /* Falls back to the runner's generic handle callout */
2394 ret = handle_sync_cmd(dev, cmd);
2395 if (ret == TCMU_STS_NOT_HANDLED)
2396 ret = tcmur_cmd_handler(dev, cmd);
2397 return ret;
2398 }
0 32bit-size_t-cast.patch
1 fix-use-after-free.patch
0 /*
1 * Copyright 2016, China Mobile, Inc.
2 *
3 * This file is licensed to you under your choice of the GNU Lesser
4 * General Public License, version 2.1 or any later version (LGPLv2.1 or
5 * later), or the Apache License 2.0.
6 */
7
8 #define _GNU_SOURCE
9 #include <inttypes.h>
10 #include <stddef.h>
11 #include <stdint.h>
12 #include <stdbool.h>
13 #include <stdio.h>
14 #include <stdlib.h>
15 #include <string.h>
16 #include <unistd.h>
17 #include <sys/types.h>
18 #include <sys/stat.h>
19 #include <sys/utsname.h>
20 #include <fcntl.h>
21 #include <endian.h>
22 #include <errno.h>
23 #include <pthread.h>
24
25 #include <scsi/scsi.h>
26
27 #include "darray.h"
28 #include "tcmu-runner.h"
29 #include "tcmur_cmd_handler.h"
30 #include "libtcmu.h"
31 #include "tcmur_device.h"
32
33 #include <rbd/librbd.h>
34 #include <rados/librados.h>
35
36 /*
37 * rbd_lock_acquire exclusive lock support was added in librbd 0.1.11
38 */
39 #if LIBRBD_VERSION_CODE >= LIBRBD_VERSION(0, 1, 11)
40 #define RBD_LOCK_ACQUIRE_SUPPORT
41 #endif
42
43 /* rbd_aio_discard added in 0.1.2 */
44 #if LIBRBD_VERSION_CODE >= LIBRBD_VERSION(0, 1, 2)
45 #define RBD_DISCARD_SUPPORT
46 #endif
47
48 /*
49 * rbd_aio_writesame support was added in librbd 1.12.0
50 */
51 #if LIBRBD_VERSION_CODE >= LIBRBD_VERSION(1, 12, 0) || LIBRBD_SUPPORTS_WRITESAME
52 #define RBD_WRITE_SAME_SUPPORT
53 #endif
54
55 /* defined in librbd.h if supported */
56 #ifdef LIBRBD_SUPPORTS_IOVEC
57 #if LIBRBD_SUPPORTS_IOVEC
58 #define RBD_IOVEC_SUPPORT
59 #endif
60 #endif
61
62 /* defined in librbd.h if supported */
63 #ifdef LIBRBD_SUPPORTS_COMPARE_AND_WRITE
64 #if LIBRBD_SUPPORTS_COMPARE_AND_WRITE
65 #define RBD_COMPARE_AND_WRITE_SUPPORT
66 #endif
67 #endif
68
69 #define TCMU_RBD_LOCKER_TAG_KEY "tcmu_rbd_locker_tag"
70 #define TCMU_RBD_LOCKER_TAG_FMT "tcmu_tag=%hu,rbd_client=%s"
71 #define TCMU_RBD_LOCKER_BUF_LEN 256
72
73 struct tcmu_rbd_state {
74 rados_t cluster;
75 rados_ioctx_t io_ctx;
76 rbd_image_t image;
77
78 char *image_name;
79 char *pool_name;
80 char *osd_op_timeout;
81 char *conf_path;
82 char *id;
83 char *addrs;
84 };
85
86 enum rbd_aio_type {
87 RBD_AIO_TYPE_WRITE = 0,
88 RBD_AIO_TYPE_READ,
89 RBD_AIO_TYPE_CAW
90 };
91
92 struct rbd_aio_cb {
93 struct tcmu_device *dev;
94 struct tcmur_cmd *tcmur_cmd;
95
96 enum rbd_aio_type type;
97 union {
98 struct {
99 int64_t length;
100 } read;
101 struct {
102 uint64_t offset;
103 uint64_t miscompare_offset;
104 } caw;
105 };
106 char *bounce_buffer;
107 struct iovec *iov;
108 size_t iov_cnt;
109 };
110
111 static pthread_mutex_t blacklist_caches_lock = PTHREAD_MUTEX_INITIALIZER;
112 static darray(char *) blacklist_caches;
113
114 #ifdef LIBRADOS_SUPPORTS_SERVICES
115
116 #ifdef RBD_LOCK_ACQUIRE_SUPPORT
117 static int tcmu_rbd_service_status_update(struct tcmu_device *dev,
118 bool has_lock)
119 {
120 struct tcmu_rbd_state *state = tcmur_dev_get_private(dev);
121 struct tcmur_device *rdev = tcmu_dev_get_private(dev);
122 char *status_buf = NULL;
123 int ret;
124
125 ret = asprintf(&status_buf,
126 "%s%c%s%c%s%c%"PRIu64"%c%s%c%"PRIu64"%c%s%c%"PRIu64"%c",
127 "lock_owner", '\0', has_lock ? "true" : "false", '\0',
128 "lock_lost_cnt", '\0', rdev->lock_lost_cnt, '\0',
129 "conn_lost_cnt", '\0', rdev->conn_lost_cnt, '\0',
130 "cmd_timed_out_cnt", '\0', rdev->cmd_timed_out_cnt, '\0');
131 if (ret < 0) {
132 tcmu_dev_err(dev, "Could not allocate status buf. Service will not be updated.\n");
133 return ret;
134 }
135
136 ret = rados_service_update_status(state->cluster, status_buf);
137 if (ret < 0) {
138 tcmu_dev_err(dev, "Could not update service status. (Err %d)\n",
139 ret);
140 }
141
142 free(status_buf);
143 return ret;
144 }
145
146 #endif /* RBD_LOCK_ACQUIRE_SUPPORT */
147
148 static int tcmu_rbd_report_event(struct tcmu_device *dev)
149 {
150 struct tcmur_device *rdev = tcmu_dev_get_private(dev);
151
152 /*
153 * We ignore the specific event and report all the current counter
154 * values, because tools like gwcli/dashboard may not see every
155 * update, and we do not want one event to overwrite the info.
156 */
157 return tcmu_rbd_service_status_update(dev,
158 rdev->lock_state == TCMUR_DEV_LOCK_WRITE_LOCKED ? true : false);
159 }
160
161 static int tcmu_rbd_service_register(struct tcmu_device *dev)
162 {
163 struct tcmu_rbd_state *state = tcmur_dev_get_private(dev);
164 struct utsname u;
165 char *daemon_buf = NULL;
166 char *metadata_buf = NULL;
167 char *image_id_buf = NULL;
168 int ret;
169
170 ret = uname(&u);
171 if (ret < 0) {
172 ret = -errno;
173 tcmu_dev_err(dev, "Could not query uname. (Err %d)\n", ret);
174 return ret;
175 }
176
177 image_id_buf = malloc(RBD_MAX_BLOCK_NAME_SIZE);
178 if (image_id_buf == NULL) {
179 tcmu_dev_err(dev, "Could not allocate image id buf.\n");
180 return -ENOMEM;
181 }
182
183 ret = rbd_get_id(state->image, image_id_buf, RBD_MAX_BLOCK_NAME_SIZE);
184 if (ret < 0) {
185 tcmu_dev_err(dev, "Could not retrieve image id.\n");
186 goto free_image_id_buf;
187 }
188
189 ret = asprintf(&daemon_buf, "%s:%s/%s",
190 u.nodename, state->pool_name, state->image_name);
191 if (ret < 0) {
192 tcmu_dev_err(dev, "Could not allocate daemon buf.\n");
193 ret = -ENOMEM;
194 goto free_image_id_buf;
195 }
196
197 ret = asprintf(&metadata_buf, "%s%c%s%c%s%c%s%c%s%c%s%c%s%c%s%c%s%c%s%c",
198 "pool_name", '\0', state->pool_name, '\0',
199 "image_name", '\0', state->image_name, '\0',
200 "image_id", '\0', image_id_buf, '\0',
201 "daemon_type", '\0', "portal", '\0',
202 "daemon_prefix", '\0', u.nodename, '\0');
203 if (ret < 0) {
204 tcmu_dev_err(dev, "Could not allocate metadata buf.\n");
205 ret = -ENOMEM;
206 goto free_daemon_buf;
207 }
208
209 ret = rados_service_register(state->cluster, "tcmu-runner",
210 daemon_buf, metadata_buf);
211 if (ret < 0) {
212 tcmu_dev_err(dev, "Could not register service to cluster. (Err %d)\n",
213 ret);
214 goto free_meta_buf;
215 }
216
217 ret = tcmu_rbd_report_event(dev);
218 if (ret < 0)
219 tcmu_dev_err(dev, "Could not update status. (Err %d)\n", ret);
220
221 free_meta_buf:
222 free(metadata_buf);
223 free_daemon_buf:
224 free(daemon_buf);
225 free_image_id_buf:
226 free(image_id_buf);
227 return ret;
228 }
229
230 #else /* LIBRADOS_SUPPORTS_SERVICES */
231
232 static int tcmu_rbd_service_register(struct tcmu_device *dev)
233 {
234 /* Ignorable. Just log in dbg mode just in case. */
235 tcmu_dev_dbg(dev, "Ceph service registration not supported.\n");
236 return 0;
237 }
238
239 #ifdef RBD_LOCK_ACQUIRE_SUPPORT
240 static void tcmu_rbd_service_status_update(struct tcmu_device *dev,
241 bool has_lock)
242 {
243 }
244 #endif /* RBD_LOCK_ACQUIRE_SUPPORT */
245
246 #endif /* LIBRADOS_SUPPORTS_SERVICES */
247
248 #if defined LIBRADOS_SUPPORTS_GETADDRS || defined RBD_LOCK_ACQUIRE_SUPPORT
249 static void tcmu_rbd_rm_stale_entry_from_blacklist(struct tcmu_device *dev, char *addrs)
250 {
251 struct tcmu_rbd_state *state = tcmur_dev_get_private(dev);
252 const char *p, *q, *end;
253 char *cmd, *addr;
254 int ret;
255
256 /*
257 * Just skip extra chars before '[' if there has
258 */
259 p = strchr(addrs, '[');
260 if (!p)
261 p = addrs;
262
263 /*
264 * The addrs will a string like:
265 * "[192.168.195.172:0/2203456141,192.168.195.172:0/4908756432]"
266 * Or
267 * "192.168.195.172:0/2203456141"
268 */
269 while (1) {
270 if (p == NULL || *p == ']') {
271 return; /* we are done here */
272 } else if (*p == '[' || *p == ',') {
273 /* Skip "[" and white spaces */
274 while (*p != '\0' && !isalnum(*p)) p++;
275 if (*p == '\0') {
276 tcmu_dev_warn(dev, "Get an invalid address '%s'!\n", addrs);
277 return;
278 }
279
280 end = strchr(p, ',');
281 if (!end)
282 end = strchr(p, ']');
283
284 if (!end) {
285 tcmu_dev_warn(dev, "Get an invalid address '%s'!\n", addrs);
286 return;
287 }
288
289 q = end; /* The *end should be ',' or ']' */
290
291 while (*q != '\0' && !isalnum(*q)) q--;
292 if (*q == '\0') {
293 tcmu_dev_warn(dev, "Get an invalid address '%s'!\n", addrs);
294 return;
295 }
296
297 addr = strndup(p, q - p + 1);
298 p = end;
299 } else {
300 /* In case of "192.168.195.172:0/2203456141" */
301 addr = strdup(p);
302 p = NULL;
303 }
304
305 ret = asprintf(&cmd,
306 "{\"prefix\": \"osd blacklist\","
307 "\"blacklistop\": \"rm\","
308 "\"addr\": \"%s\"}",
309 addr);
310 free(addr);
311 if (ret < 0) {
312 tcmu_dev_warn(dev, "Could not allocate command. (Err %d)\n",
313 ret);
314 return;
315 }
316 ret = rados_mon_command(state->cluster, (const char**)&cmd, 1, NULL, 0,
317 NULL, NULL, NULL, NULL);
318 free(cmd);
319 if (ret < 0) {
320 tcmu_dev_err(dev, "Could not rm blacklist entry '%s'. (Err %d)\n",
321 addr, ret);
322 return;
323 }
324 }
325 }
326
327 static int tcmu_rbd_rm_stale_entries_from_blacklist(struct tcmu_device *dev)
328 {
329 char **entry, *tmp_entry;
330 int ret = 0;
331 int i;
332
333 pthread_mutex_lock(&blacklist_caches_lock);
334 if (darray_empty(blacklist_caches))
335 goto unlock;
336
337 /* Try to remove all the stale blacklist entities */
338 darray_foreach(entry, blacklist_caches) {
339 tcmu_dev_info(dev, "removing addrs: {%s}\n", *entry);
340 tcmu_rbd_rm_stale_entry_from_blacklist(dev, *entry);
341 }
342
343 unlock:
344 for (i = darray_size(blacklist_caches) - 1; i >= 0; i--) {
345 tmp_entry = darray_item(blacklist_caches, i);
346 darray_remove(blacklist_caches, i);
347 free(tmp_entry);
348 }
349
350 pthread_mutex_unlock(&blacklist_caches_lock);
351 return ret;
352 }
353 #endif // LIBRADOS_SUPPORTS_GETADDRS || RBD_LOCK_ACQUIRE_SUPPORT
354
355 static char *tcmu_rbd_find_quote(char *string)
356 {
357 /* ignore escaped quotes */
358 while (true) {
359 string = strpbrk(string, "\"\\");
360 if (!string) {
361 break;
362 }
363
364 if (*string == '"') {
365 break;
366 }
367
368 if (*++string == '\0') {
369 break;
370 }
371
372 /* skip past the escaped character */
373 ++string;
374 }
375 return string;
376 }
377
378 static bool tcmu_rbd_match_device_class(struct tcmu_device *dev,
379 const char *crush_rule,
380 const char *device_class)
381 {
382 struct tcmu_rbd_state *state = tcmur_dev_get_private(dev);
383 char *mon_cmd_bufs[2] = {NULL, NULL};
384 char *mon_buf = NULL, *mon_status_buf = NULL;
385 size_t mon_buf_len = 0, mon_status_buf_len = 0;
386 int ret;
387 bool match = false;
388
389 /* request a list of crush rules associated to the device class */
390 ret = asprintf(&mon_cmd_bufs[0],
391 "{\"prefix\": \"osd crush rule ls-by-class\", "
392 "\"class\": \"%s\", \"format\": \"json\"}",
393 device_class);
394 if (ret < 0) {
395 tcmu_dev_warn(dev, "Could not allocate crush rule ls-by-class command.\n");
396 return false;
397 }
398
399 ret = rados_mon_command(state->cluster, (const char **)mon_cmd_bufs, 1,
400 "", 0, &mon_buf, &mon_buf_len,
401 &mon_status_buf, &mon_status_buf_len);
402 free(mon_cmd_bufs[0]);
403 if (ret == -ENOENT) {
404 tcmu_dev_dbg(dev, "%s not a registered device class.\n", device_class);
405 return false;
406 } else if (ret < 0 || !mon_buf) {
407 tcmu_dev_warn(dev, "Could not retrieve pool crush rule ls-by-class (Err %d)\n",
408 ret);
409 return false;
410 }
411 rados_buffer_free(mon_status_buf);
412
413 /* expected JSON output: ["<rule name>",["<rule name>"...]] */
414 mon_buf[mon_buf_len - 1] = '\0';
415 match = (strstr(mon_buf, crush_rule) != NULL);
416 rados_buffer_free(mon_buf);
417 return match;
418 }
419
420 static void tcmu_rbd_detect_device_class(struct tcmu_device *dev)
421 {
422 struct tcmu_rbd_state *state = tcmur_dev_get_private(dev);
423 char *mon_cmd_bufs[2] = {NULL, NULL};
424 char *mon_buf = NULL, *mon_status_buf = NULL;
425 size_t mon_buf_len = 0, mon_status_buf_len = 0;
426 char *crush_rule = NULL, *crush_rule_end = NULL;
427 int ret;
428
429 /* request the crush rule name for the image's pool */
430 ret = asprintf(&mon_cmd_bufs[0],
431 "{\"prefix\": \"osd pool get\", "
432 "\"pool\": \"%s\", "
433 "\"var\": \"crush_rule\", "
434 "\"format\": \"json\"}", state->pool_name);
435 if (ret < 0) {
436 tcmu_dev_warn(dev, "Could not allocate crush rule command.\n");
437 return;
438 }
439
440 ret = rados_mon_command(state->cluster, (const char **)mon_cmd_bufs, 1,
441 "", 0, &mon_buf, &mon_buf_len,
442 &mon_status_buf, &mon_status_buf_len);
443 free(mon_cmd_bufs[0]);
444 if (ret < 0 || !mon_buf) {
445 tcmu_dev_warn(dev, "Could not retrieve pool crush rule (Err %d)\n",
446 ret);
447 return;
448 }
449 rados_buffer_free(mon_status_buf);
450
451 /* expected JSON output: "{..."crush_rule":"<rule name>"...}" */
452 mon_buf[mon_buf_len - 1] = '\0';
453 crush_rule = strstr(mon_buf, "\"crush_rule\":\"");
454 if (!crush_rule) {
455 tcmu_dev_warn(dev, "Could not locate crush rule key\n");
456 rados_buffer_free(mon_buf);
457 return;
458 }
459
460 /* skip past key to the start of the quoted rule name */
461 crush_rule += 13;
462 crush_rule_end = tcmu_rbd_find_quote(crush_rule + 1);
463 if (!crush_rule_end) {
464 tcmu_dev_warn(dev, "Could not extract crush rule\n");
465 rados_buffer_free(mon_buf);
466 return;
467 }
468
469 *(crush_rule_end + 1) = '\0';
470 crush_rule = strdup(crush_rule);
471 rados_buffer_free(mon_buf);
472 tcmu_dev_dbg(dev, "Pool %s using crush rule %s\n", state->pool_name,
473 crush_rule);
474
475 if (tcmu_rbd_match_device_class(dev, crush_rule, "ssd") ||
476 tcmu_rbd_match_device_class(dev, crush_rule, "nvme")) {
477 tcmu_dev_dbg(dev, "Pool %s associated to solid state device class.\n",
478 state->pool_name);
479 tcmu_dev_set_solid_state_media(dev, true);
480 }
481
482 free(crush_rule);
483 }
484
485 static void tcmu_rbd_image_close(struct tcmu_device *dev)
486 {
487 struct tcmu_rbd_state *state = tcmur_dev_get_private(dev);
488
489 rbd_close(state->image);
490 rados_ioctx_destroy(state->io_ctx);
491 rados_shutdown(state->cluster);
492
493 state->cluster = NULL;
494 state->io_ctx = NULL;
495 state->image = NULL;
496 }
497
498 static int timer_check_and_set_def(struct tcmu_device *dev)
499 {
500 struct tcmu_rbd_state *state = tcmur_dev_get_private(dev);
501 char buf[128];
502 int grace, interval, ret, len;
503 float timeout;
504
505 ret = rados_conf_get(state->cluster, "osd_heartbeat_grace",
506 buf, 128);
507 if (ret) {
508 tcmu_dev_err(dev, "Failed to get cluster's default osd_heartbeat_grace\n");
509 return ret;
510 }
511 grace = atoi(buf);
512
513 ret = rados_conf_get(state->cluster, "osd_heartbeat_interval",
514 buf, 128);
515 if (ret) {
516 tcmu_dev_err(dev, "Failed to get cluster's default osd_heartbeat_interval\n");
517 return ret;
518 }
519 interval = atoi(buf);
520
521 ret = rados_conf_get(state->cluster, "rados_osd_op_timeout",
522 buf, 128);
523 if (ret) {
524 tcmu_dev_err(dev, "Failed to get cluster's default rados_osd_op_timeout\n");
525 return ret;
526 }
527 timeout = atof(buf);
528
529 tcmu_dev_dbg(dev, "The cluster's default osd op timeout(%f), osd heartbeat grace(%d) interval(%d)\n",
530 timeout, grace, interval);
531
532 /* Frist: Try to use new osd op timeout value */
533 if (state->osd_op_timeout && atof(state->osd_op_timeout) > grace + interval)
534 goto set;
535
536 /* Second: Try to use the default osd op timeout value as read from the cluster */
537 if (timeout > grace + interval) {
538 tcmu_dev_dbg(dev, "The osd op timeout will remain the default value: %f\n", timeout);
539 return 0;
540 }
541
542 tcmu_dev_warn(dev, "osd op timeout (%s) must be larger than osd heartbeat grace (%d) + interval (%d)!\n",
543 state->osd_op_timeout, grace, interval);
544
545 /*
546 * At last: Set the default rados_osd_op_timeout to grace + interval + 5
547 * to make sure rados_osd_op_timeout > grace + interval.
548 */
549 len = sprintf(buf, "%d", grace + interval + 5);
550 buf[len] = '\0';
551
552 if (state->osd_op_timeout)
553 free(state->osd_op_timeout);
554
555 state->osd_op_timeout = strdup(buf);
556 if (!state->osd_op_timeout) {
557 tcmu_dev_err(dev, "Failed to alloc memory for ->osd_op_timeout\n");
558 return -ENOMEM;
559 }
560
561 tcmu_dev_warn(dev, "Will set the osd op timeout to %s instead!\n",
562 state->osd_op_timeout);
563
564 set:
565 return rados_conf_set(state->cluster, "rados_osd_op_timeout",
566 state->osd_op_timeout);
567 }
568
569 static int tcmu_rbd_image_open(struct tcmu_device *dev)
570 {
571 struct tcmu_rbd_state *state = tcmur_dev_get_private(dev);
572 int ret;
573
574 ret = rados_create(&state->cluster, state->id);
575 if (ret < 0) {
576 tcmu_dev_err(dev, "Could not create cluster. (Err %d)\n", ret);
577 return ret;
578 }
579
580 /* Try default location when conf_path=NULL, but ignore failure */
581 ret = rados_conf_read_file(state->cluster, state->conf_path);
582 if (state->conf_path && ret < 0) {
583 tcmu_dev_err(dev, "Could not read config %s (Err %d)",
584 state->conf_path, ret);
585 goto rados_shutdown;
586 }
587
588 rados_conf_set(state->cluster, "rbd_cache", "false");
589
590 ret = timer_check_and_set_def(dev);
591 if (ret)
592 tcmu_dev_warn(dev,
593 "Could not set rados osd op timeout to %s (Err %d. Failover may be delayed.)\n",
594 state->osd_op_timeout, ret);
595
596 ret = rados_connect(state->cluster);
597 if (ret < 0) {
598 tcmu_dev_err(dev, "Could not connect to cluster. (Err %d)\n",
599 ret);
600 goto rados_shutdown;
601 }
602
603 tcmu_rbd_detect_device_class(dev);
604 ret = rados_ioctx_create(state->cluster, state->pool_name,
605 &state->io_ctx);
606 if (ret < 0) {
607 tcmu_dev_err(dev, "Could not create ioctx for pool %s. (Err %d)\n",
608 state->pool_name, ret);
609 goto rados_shutdown;
610 }
611
612 ret = rbd_open(state->io_ctx, state->image_name, &state->image, NULL);
613 if (ret < 0) {
614 tcmu_dev_err(dev, "Could not open image %s. (Err %d)\n",
615 state->image_name, ret);
616 goto rados_destroy;
617 }
618
619 ret = tcmu_rbd_service_register(dev);
620 if (ret < 0)
621 goto rbd_close;
622
623 return 0;
624
625 rbd_close:
626 rbd_close(state->image);
627 state->image = NULL;
628 rados_destroy:
629 rados_ioctx_destroy(state->io_ctx);
630 state->io_ctx = NULL;
631 rados_shutdown:
632 rados_shutdown(state->cluster);
633 state->cluster = NULL;
634 return ret;
635 }
636
637 #ifdef RBD_LOCK_ACQUIRE_SUPPORT
638
639 /*
640 * Returns:
641 * 0 = client is not owner.
642 * 1 = client is owner.
643 * -ESHUTDOWN/-EBLACKLISTED(-108) = client is blacklisted.
644 * -ETIMEDOUT = rados osd op timeout has expired.
645 * -EIO = misc error.
646 */
647 static int tcmu_rbd_has_lock(struct tcmu_device *dev)
648 {
649 struct tcmu_rbd_state *state = tcmur_dev_get_private(dev);
650 int ret, is_owner;
651
652 ret = rbd_is_exclusive_lock_owner(state->image, &is_owner);
653 if (ret < 0) {
654 if (ret == -ESHUTDOWN) {
655 tcmu_dev_dbg(dev, "Client is blacklisted. Could not check lock ownership.\n");
656 } else {
657 tcmu_dev_err(dev, "Could not check lock ownership. Error: %s.\n",
658 strerror(-ret));
659 }
660
661 if (ret == -ESHUTDOWN || ret == -ETIMEDOUT)
662 return ret;
663
664 /* let initiator figure things out */
665 return -EIO;
666 } else if (is_owner) {
667 tcmu_dev_dbg(dev, "Is owner\n");
668 return 1;
669 }
670 tcmu_dev_dbg(dev, "Not owner\n");
671
672 return 0;
673 }
674
675 static int tcmu_rbd_get_lock_state(struct tcmu_device *dev)
676 {
677 int ret;
678
679 ret = tcmu_rbd_has_lock(dev);
680 if (ret == 1)
681 return TCMUR_DEV_LOCK_WRITE_LOCKED;
682 else if (ret == 0 || ret == -ESHUTDOWN)
683 return TCMUR_DEV_LOCK_UNLOCKED;
684 else
685 return TCMUR_DEV_LOCK_UNKNOWN;
686 }
687
688 /**
689 * tcmu_rbd_lock_break - break rbd exclusive lock if needed
690 * @dev: device to break the lock for.
691 *
692 * Returns:
693 * 0 = lock has been broken.
694 * -ETIMEDOUT = could not complete operation in rados osd op timeout seconds.
695 * -Ezyx = misc failure.
696 */
697 static int tcmu_rbd_lock_break(struct tcmu_device *dev)
698 {
699 struct tcmu_rbd_state *state = tcmur_dev_get_private(dev);
700 rbd_lock_mode_t lock_mode;
701 char *owners[1];
702 size_t num_owners = 1;
703 int ret;
704
705 ret = rbd_lock_get_owners(state->image, &lock_mode, owners,
706 &num_owners);
707 if (ret == -ENOENT || (!ret && !num_owners))
708 return 0;
709
710 if (ret < 0) {
711 tcmu_dev_err(dev, "Could not get lock owners to break lock %d\n",
712 ret);
713 return ret;
714 }
715
716 if (lock_mode != RBD_LOCK_MODE_EXCLUSIVE) {
717 tcmu_dev_err(dev, "Invalid lock type (%d) found\n", lock_mode);
718 ret = -EIO;
719 goto free_owners;
720 }
721
722 tcmu_dev_dbg(dev, "Attempting to break lock from %s.\n", owners[0]);
723
724 ret = rbd_lock_break(state->image, lock_mode, owners[0]);
725 if (ret < 0)
726 tcmu_dev_err(dev, "Could not break lock from %s. (Err %d)\n",
727 owners[0], ret);
728 free_owners:
729 rbd_lock_get_owners_cleanup(owners, num_owners);
730 return ret;
731 }
732
733 static int tcmu_rbd_to_sts(int rc)
734 {
735 switch (rc) {
736 case 0:
737 return TCMU_STS_OK;
738 case -ESHUTDOWN:
739 return TCMU_STS_FENCED;
740 case -ENOENT:
741 return TCMU_STS_NO_LOCK_HOLDERS;
742 case -ETIMEDOUT:
743 return TCMU_STS_TIMEOUT;
744 case -ENOMEM:
745 return TCMU_STS_NO_RESOURCE;
746 default:
747 return TCMU_STS_HW_ERR;
748 }
749 }
750
751 static int tcmu_rbd_get_lock_tag(struct tcmu_device *dev, uint16_t *tag)
752 {
753 struct tcmu_rbd_state *state = tcmur_dev_get_private(dev);
754 char *metadata_owner, *owners[1];
755 size_t num_owners = 1;
756 rbd_lock_mode_t lock_mode;
757 char buf[TCMU_RBD_LOCKER_BUF_LEN];
758 size_t buf_len = TCMU_RBD_LOCKER_BUF_LEN;
759 int ret;
760
761 memset(buf, 0, buf_len);
762
763 ret = rbd_metadata_get(state->image, TCMU_RBD_LOCKER_TAG_KEY,
764 buf, &buf_len);
765 tcmu_dev_dbg(dev, "get meta got %d %s\n", ret, buf);
766 if (ret)
767 goto done;
768
769 ret = rbd_lock_get_owners(state->image, &lock_mode, owners,
770 &num_owners);
771 tcmu_dev_dbg(dev, "get lockowner got %d\n", ret);
772 if (ret)
773 goto done;
774 if (!num_owners) {
775 /* there would be stale metadata due to a crash */
776 ret = -ENOENT;
777 goto done;
778 }
779
780 metadata_owner = strstr(buf, "rbd_client=");
781 if (!metadata_owner) {
782 tcmu_dev_err(dev, "Invalid lock tag %s.\n", buf);
783 /* Force initiator to retry STPG */
784 ret = -ENOENT;
785 goto free_owners;
786 }
787
788 metadata_owner += 11;
789 if (strcmp(metadata_owner, owners[0])) {
790 tcmu_dev_dbg(dev, "owner mismatch %s %s\n", metadata_owner,
791 owners[0]);
792 /*
793 * A node could be in the middle of grabbing the lock or it
794 * failed midway. Force tcmu to report all standby so the
795 * initiator retries the STPG for the failure case.
796 */
797 ret = -ENOENT;
798 goto free_owners;
799 }
800
801 ret = sscanf(buf, "tcmu_tag=%hu,%*s", tag);
802 if (ret != 1) {
803 tcmu_dev_err(dev, "Invalid lock tag %s.\n", buf);
804 /* Force initiator to retry STPG */
805 ret = -ENOENT;
806 goto free_owners;
807 }
808 ret = 0;
809
810 free_owners:
811 if (num_owners)
812 rbd_lock_get_owners_cleanup(owners, num_owners);
813
814 done:
815 return tcmu_rbd_to_sts(ret);
816 }
817
818 static int tcmu_rbd_set_lock_tag(struct tcmu_device *dev, uint16_t tcmu_tag)
819 {
820 struct tcmu_rbd_state *state = tcmur_dev_get_private(dev);
821 rbd_lock_mode_t lock_mode;
822 char *owners[1];
823 size_t num_owners = 1;
824 char *tcmu_rbd_tag;
825 int ret;
826
827 /*
828 * We cannot take the lock and set the tag atomically. In case
829 * we fail here and are in an inconsistent state, we attach the rbd
830 * client lock info along with the tcmu locker tag so remote nodes
831 * can check the rbd info against rbd_lock_get_owners to determine if
832 * the tcmu locker tag is current/valid.
833 */
834 ret = rbd_lock_get_owners(state->image, &lock_mode, owners,
835 &num_owners);
836 tcmu_dev_dbg(dev, "set tag get lockowner got %d %zd\n", ret, num_owners);
837 if (ret)
838 return ret;
839
840 if (!num_owners)
841 return -ENOENT;
842
843 ret = asprintf(&tcmu_rbd_tag, TCMU_RBD_LOCKER_TAG_FMT, tcmu_tag,
844 owners[0]);
845 if (ret < 0) {
846 ret = -ENOMEM;
847 goto free_owners;
848 }
849
850 ret = rbd_metadata_set(state->image, TCMU_RBD_LOCKER_TAG_KEY,
851 tcmu_rbd_tag);
852 free(tcmu_rbd_tag);
853 if (ret)
854 tcmu_dev_err(dev, "Could not store lock tag. Err %d.\n", ret);
855
856 free_owners:
857 if (num_owners)
858 rbd_lock_get_owners_cleanup(owners, num_owners);
859 return ret;
860 }
861
862 static int tcmu_rbd_unlock(struct tcmu_device *dev)
863 {
864 struct tcmu_rbd_state *state = tcmur_dev_get_private(dev);
865 int ret;
866
867 ret = tcmu_rbd_has_lock(dev);
868 if (ret == 0)
869 return TCMU_STS_OK;
870 else if (ret < 0)
871 return tcmu_rbd_to_sts(ret);
872
873 ret = rbd_lock_release(state->image);
874 if (!ret)
875 return TCMU_STS_OK;
876
877 tcmu_dev_err(dev, "Could not release lock. Err %d.\n", ret);
878 return tcmu_rbd_to_sts(ret);
879 }
880
881 static int tcmu_rbd_lock(struct tcmu_device *dev, uint16_t tag)
882 {
883 struct tcmu_rbd_state *state = tcmur_dev_get_private(dev);
884 #if !defined LIBRADOS_SUPPORTS_GETADDRS && defined RBD_LOCK_ACQUIRE_SUPPORT
885 rbd_lock_mode_t lock_mode;
886 char *owners1[1], *owners2[1];
887 size_t num_owners1 = 1, num_owners2 = 1;
888 #endif
889 int ret;
890
891 ret = tcmu_rbd_has_lock(dev);
892 if (ret == 1) {
893 ret = 0;
894 /*
895 * We might have failed after getting the lock, but
896 * before we set the meta data.
897 */
898 goto set_lock_tag;
899 } else if (ret)
900 goto done;
901
902 ret = tcmu_rbd_lock_break(dev);
903 if (ret)
904 goto done;
905
906 ret = rbd_lock_acquire(state->image, RBD_LOCK_MODE_EXCLUSIVE);
907 if (ret)
908 goto done;
909
910 #if !defined LIBRADOS_SUPPORTS_GETADDRS && defined RBD_LOCK_ACQUIRE_SUPPORT
911 ret = rbd_lock_get_owners(state->image, &lock_mode, owners1,
912 &num_owners1);
913 if ((!ret && !num_owners1) || ret < 0) {
914 tcmu_dev_warn(dev, "Could not get lock owners to store blacklist entry %d!\n",
915 ret);
916 } else {
917 int is_owner;
918
919 /* To check whether we are still the lock owner */
920 ret = rbd_is_exclusive_lock_owner(state->image, &is_owner);
921 if (ret) {
922 rbd_lock_get_owners_cleanup(owners1, num_owners1);
923 tcmu_dev_warn(dev, "Could not check lock owners to store blacklist entry %d!\n",
924 ret);
925 goto no_owner;
926 }
927
928 /* To get the lock owner again */
929 ret = rbd_lock_get_owners(state->image, &lock_mode, owners2,
930 &num_owners2);
931 if ((!ret && !num_owners2) || ret < 0) {
932 tcmu_dev_warn(dev, "Could not get lock owners to store blacklist entry %d!\n",
933 ret);
934 /* Only we didn't lose the lock during the above check will we store the blacklist list */
935 } else if (!strcmp(owners1[0], owners2[0]) && is_owner) {
936 state->addrs = strdup(owners1[0]); // ignore the errors
937 }
938
939 rbd_lock_get_owners_cleanup(owners1, num_owners1);
940 rbd_lock_get_owners_cleanup(owners2, num_owners2);
941 }
942 no_owner:
943 #endif
944
945 set_lock_tag:
946 tcmu_dev_warn(dev, "Acquired exclusive lock.\n");
947 if (tag != TCMU_INVALID_LOCK_TAG)
948 ret = tcmu_rbd_set_lock_tag(dev, tag);
949
950 done:
951 tcmu_rbd_service_status_update(dev, ret == 0 ? true : false);
952 return tcmu_rbd_to_sts(ret);
953 }
954
955 static void tcmu_rbd_check_excl_lock_enabled(struct tcmu_device *dev)
956 {
957 struct tcmu_rbd_state *state = tcmur_dev_get_private(dev);
958 uint64_t features = 0;
959 int ret;
960
961 ret = rbd_get_features(state->image, &features);
962 if (ret) {
963 tcmu_dev_warn(dev, "Could not get rbd features. HA may not be supported. Err %d.\n", ret);
964 return;
965 }
966
967 if (!(features & RBD_FEATURE_EXCLUSIVE_LOCK)) {
968 tcmu_dev_warn(dev, "exclusive-lock not enabled for image. HA not supported.\n");
969 }
970 }
971
972 #else /* RBD_LOCK_ACQUIRE_SUPPORT */
973
974 static void tcmu_rbd_check_excl_lock_enabled(struct tcmu_device *dev)
975 {
976 tcmu_dev_warn(dev, "HA not supported.\n");
977 }
978
979 #endif /* RBD_LOCK_ACQUIRE_SUPPORT */
980
981 static void tcmu_rbd_state_free(struct tcmu_rbd_state *state)
982 {
983 if (state->conf_path)
984 free(state->conf_path);
985 if (state->osd_op_timeout)
986 free(state->osd_op_timeout);
987 if (state->image_name)
988 free(state->image_name);
989 if (state->pool_name)
990 free(state->pool_name);
991 if (state->id)
992 free(state->id);
993 if (state->addrs)
994 free(state->addrs);
995 free(state);
996 }
997
998 static int tcmu_rbd_check_image_size(struct tcmu_device *dev, uint64_t new_size)
999 {
1000 struct tcmu_rbd_state *state = tcmur_dev_get_private(dev);
1001 uint64_t rbd_size;
1002 int ret;
1003
1004 ret = rbd_get_size(state->image, &rbd_size);
1005 if (ret < 0) {
1006 tcmu_dev_err(dev, "Could not get rbd size from cluster. Err %d.\n",
1007 ret);
1008 return ret;
1009 }
1010
1011 if (new_size != rbd_size) {
1012 tcmu_dev_err(dev, "Mismatched sizes. RBD image size %" PRIu64 ". Requested new size %" PRIu64 ".\n",
1013 rbd_size, new_size);
1014 return -EINVAL;
1015 }
1016
1017 return 0;
1018 }
1019
1020 static int tcmu_rbd_open(struct tcmu_device *dev, bool reopen)
1021 {
1022 rbd_image_info_t image_info;
1023 char *pool, *name, *next_opt;
1024 char *config, *dev_cfg_dup;
1025 struct tcmu_rbd_state *state;
1026 uint32_t max_blocks, unmap_gran;
1027 int ret;
1028 char buf[128];
1029
1030 state = calloc(1, sizeof(*state));
1031 if (!state)
1032 return -ENOMEM;
1033 tcmur_dev_set_private(dev, state);
1034
1035 dev_cfg_dup = strdup(tcmu_dev_get_cfgstring(dev));
1036 config = dev_cfg_dup;
1037 if (!config) {
1038 ret = -ENOMEM;
1039 goto free_state;
1040 }
1041
1042 tcmu_dev_dbg(dev, "tcmu_rbd_open config %s block size %u num lbas %" PRIu64 ".\n",
1043 config, tcmu_dev_get_block_size(dev),
1044 tcmu_dev_get_num_lbas(dev));
1045
1046 config = strchr(config, '/');
1047 if (!config) {
1048 tcmu_dev_err(dev, "no configuration found in cfgstring\n");
1049 ret = -EINVAL;
1050 goto free_config;
1051 }
1052 config += 1; /* get past '/' */
1053
1054 pool = strtok(config, "/");
1055 if (!pool) {
1056 tcmu_dev_err(dev, "Could not get pool name\n");
1057 ret = -EINVAL;
1058 goto free_config;
1059 }
1060 state->pool_name = strdup(pool);
1061 if (!state->pool_name) {
1062 ret = -ENOMEM;
1063 tcmu_dev_err(dev, "Could not copy pool name\n");
1064 goto free_config;
1065 }
1066
1067 name = strtok(NULL, ";");
1068 if (!name) {
1069 tcmu_dev_err(dev, "Could not get image name\n");
1070 ret = -EINVAL;
1071 goto free_config;
1072 }
1073
1074 state->image_name = strdup(name);
1075 if (!state->image_name) {
1076 ret = -ENOMEM;
1077 tcmu_dev_err(dev, "Could not copy image name\n");
1078 goto free_config;
1079 }
1080
1081 /* The next options are optional */
1082 next_opt = strtok(NULL, ";");
1083 while (next_opt) {
1084 if (!strncmp(next_opt, "osd_op_timeout=", 15)) {
1085 state->osd_op_timeout = strdup(next_opt + 15);
1086 if (!state->osd_op_timeout ||
1087 !strlen(state->osd_op_timeout)) {
1088 ret = -ENOMEM;
1089 tcmu_dev_err(dev, "Could not copy osd op timeout.\n");
1090 goto free_config;
1091 }
1092 } else if (!strncmp(next_opt, "conf=", 5)) {
1093 state->conf_path = strdup(next_opt + 5);
1094 if (!state->conf_path || !strlen(state->conf_path)) {
1095 ret = -ENOMEM;
1096 tcmu_dev_err(dev, "Could not copy conf path.\n");
1097 goto free_config;
1098 }
1099 } else if (!strncmp(next_opt, "id=", 3)) {
1100 state->id = strdup(next_opt + 3);
1101 if (!state->id || !strlen(state->id)) {
1102 ret = -ENOMEM;
1103 tcmu_dev_err(dev, "Could not copy id.\n");
1104 goto free_config;
1105 }
1106 }
1107 next_opt = strtok(NULL, ";");
1108 }
1109
1110 ret = tcmu_rbd_image_open(dev);
1111 if (ret < 0) {
1112 goto free_config;
1113 }
1114
1115 tcmu_rbd_check_excl_lock_enabled(dev);
1116
1117 ret = tcmu_rbd_check_image_size(dev, tcmu_dev_get_block_size(dev) *
1118 tcmu_dev_get_num_lbas(dev));
1119 if (ret) {
1120 goto stop_image;
1121 }
1122
1123 ret = rbd_stat(state->image, &image_info, sizeof(image_info));
1124 if (ret < 0) {
1125 tcmu_dev_err(dev, "Could not stat image.\n");
1126 goto stop_image;
1127 }
1128
1129 /*
1130 * librbd/ceph can better split and align unmaps and internal RWs, so
1131 * just have runner pass the entire cmd to us. To try and balance
1132 * overflowing the OSD/ceph side queues with discards/RWs limit it to
1133 * up to 4.
1134 */
1135 max_blocks = (image_info.obj_size * 4) / tcmu_dev_get_block_size(dev);
1136 tcmu_dev_set_opt_xcopy_rw_len(dev, max_blocks);
1137 tcmu_dev_set_max_unmap_len(dev, max_blocks);
1138 ret = rados_conf_get(state->cluster, "rbd_discard_granularity_bytes", buf,
1139 sizeof(buf));
1140 if (!ret) {
1141 tcmu_dev_dbg(dev, "rbd_discard_granularity_bytes: %s\n", buf);
1142 unmap_gran = atoi(buf) / tcmu_dev_get_block_size(dev);
1143 } else {
1144 tcmu_dev_warn(dev,
1145 "Failed to get 'rbd_discard_granularity_bytes', %d\n",
1146 ret);
1147 unmap_gran = image_info.obj_size / tcmu_dev_get_block_size(dev);
1148 }
1149 tcmu_dev_dbg(dev, "unmap_gran: %d\n", unmap_gran);
1150 tcmu_dev_set_opt_unmap_gran(dev, unmap_gran, false);
1151 tcmu_dev_set_unmap_gran_align(dev, unmap_gran);
1152 tcmu_dev_set_write_cache_enabled(dev, 0);
1153
1154 #if defined LIBRADOS_SUPPORTS_GETADDRS || defined RBD_LOCK_ACQUIRE_SUPPORT
1155 tcmu_rbd_rm_stale_entries_from_blacklist(dev);
1156 #endif
1157
1158 #ifdef LIBRADOS_SUPPORTS_GETADDRS
1159 /* Get current entry address for the image */
1160 ret = rados_getaddrs(state->cluster, &state->addrs);
1161 tcmu_dev_info(dev, "address: {%s}\n", state->addrs);
1162 if (ret < 0)
1163 return ret;
1164 #endif
1165
1166 free(dev_cfg_dup);
1167 return 0;
1168
1169 stop_image:
1170 tcmu_rbd_image_close(dev);
1171 free_config:
1172 free(dev_cfg_dup);
1173 free_state:
1174 tcmu_rbd_state_free(state);
1175 return ret;
1176 }
1177
1178 static void tcmu_rbd_close(struct tcmu_device *dev)
1179 {
1180 struct tcmu_rbd_state *state = tcmur_dev_get_private(dev);
1181
1182 tcmu_rbd_image_close(dev);
1183
1184 /*
1185 * Since we are closing the device, but current device maybe
1186 * already blacklisted by other tcmu nodes. Let's just save
1187 * the entity addrs into the blacklist_caches, and let any
1188 * other new device help remove it.
1189 */
1190 if (state->addrs) {
1191 pthread_mutex_lock(&blacklist_caches_lock);
1192 darray_append(blacklist_caches, state->addrs);
1193 pthread_mutex_unlock(&blacklist_caches_lock);
1194 state->addrs = NULL;
1195 }
1196
1197 tcmu_rbd_state_free(state);
1198 }
1199
1200 static int tcmu_rbd_handle_blacklisted_cmd(struct tcmu_device *dev)
1201 {
1202 tcmu_notify_lock_lost(dev);
1203 /*
1204 * This will happen during failback normally, because
1205 * running IO is failed due to librbd's immediate blacklisting
1206 * during lock acquisition on a higher priority path.
1207 */
1208 return TCMU_STS_BUSY;
1209 }
1210
1211 /*
1212 * TODO: Check timers.
1213 * The rados osd op timeout must be longer than the timeouts to detect
1214 * unreachable OSDs (osd heartbeat grace + osd heartbeat interval) or
1215 * we will end up failing the transport connection when we just needed
1216 * to try a different OSD.
1217 */
1218 static int tcmu_rbd_handle_timedout_cmd(struct tcmu_device *dev)
1219 {
1220 tcmu_dev_err(dev, "Timing out cmd.\n");
1221 tcmu_notify_cmd_timed_out(dev);
1222
1223 /*
1224 * TODO: For AA, we will want to kill the ceph tcp connections
1225 * with LINGER on and set to 0, so there are no TCP retries,
1226 * and we need something on the OSD side to drop requests
1227 * that end up reaching it after the initiator's failover/recovery
1228 * timeout. For implicit and explicit FO, we will just disable
1229 * the iscsi port, and let the initiator switch paths which will
1230 * result in us getting blacklisted, so fail with a retryable
1231 * error.
1232 */
1233 return TCMU_STS_TIMEOUT;
1234 }
1235
1236 #ifdef RBD_IOVEC_SUPPORT
1237
1238 static rbd_image_t tcmu_dev_to_image(struct tcmu_device *dev)
1239 {
1240 struct tcmu_rbd_state *state = tcmur_dev_get_private(dev);
1241 return state->image;
1242 }
1243
1244 #define tcmu_rbd_aio_read(dev, aio_cb, completion, iov, iov_cnt, length, offset) \
1245 rbd_aio_readv(tcmu_dev_to_image(dev), iov, iov_cnt, offset, completion);
1246
1247 #define tcmu_rbd_aio_write(dev, aio_cb, completion, iov, iov_cnt, length, offset) \
1248 rbd_aio_writev(tcmu_dev_to_image(dev), iov, iov_cnt, offset, completion);
1249
1250 #else
1251
1252 static int tcmu_rbd_aio_read(struct tcmu_device *dev, struct rbd_aio_cb *aio_cb,
1253 rbd_completion_t completion, struct iovec *iov,
1254 size_t iov_cnt, size_t length, off_t offset)
1255 {
1256 struct tcmu_rbd_state *state = tcmur_dev_get_private(dev);
1257 int ret;
1258
1259 aio_cb->bounce_buffer = malloc(length);
1260 if (!aio_cb->bounce_buffer) {
1261 tcmu_dev_err(dev, "Could not allocate bounce buffer.\n");
1262 return -ENOMEM;
1263 }
1264
1265 ret = rbd_aio_read(state->image, offset, length, aio_cb->bounce_buffer,
1266 completion);
1267 if (ret < 0)
1268 free(aio_cb->bounce_buffer);
1269 return ret;
1270 }
1271
1272 static int tcmu_rbd_aio_write(struct tcmu_device *dev, struct rbd_aio_cb *aio_cb,
1273 rbd_completion_t completion, struct iovec *iov,
1274 size_t iov_cnt, size_t length, off_t offset)
1275 {
1276 struct tcmu_rbd_state *state = tcmur_dev_get_private(dev);
1277 int ret;
1278
1279 aio_cb->bounce_buffer = malloc(length);
1280 if (!aio_cb->bounce_buffer) {
1281 tcmu_dev_err(dev, "Failed to allocate bounce buffer.\n");
1282 return -ENOMEM;;
1283 }
1284
1285 tcmu_memcpy_from_iovec(aio_cb->bounce_buffer, length, iov, iov_cnt);
1286
1287 ret = rbd_aio_write(state->image, offset, length, aio_cb->bounce_buffer,
1288 completion);
1289 if (ret < 0)
1290 free(aio_cb->bounce_buffer);
1291 return ret;
1292 }
1293
1294 #endif
1295
1296 /*
1297 * NOTE: RBD async APIs almost always return 0 (success), except
1298 * when allocation (via new) fails - which is not caught. So,
1299 * the only errno we've to bother about as of now are memory
1300 * allocation errors.
1301 */
1302 static void rbd_finish_aio_generic(rbd_completion_t completion,
1303 struct rbd_aio_cb *aio_cb)
1304 {
1305 struct tcmu_device *dev = aio_cb->dev;
1306 struct tcmur_cmd *tcmur_cmd = aio_cb->tcmur_cmd;
1307 struct iovec *iov = aio_cb->iov;
1308 size_t iov_cnt = aio_cb->iov_cnt;
1309 uint32_t cmp_offset;
1310 int64_t ret;
1311 int tcmu_r;
1312
1313 ret = rbd_aio_get_return_value(completion);
1314 rbd_aio_release(completion);
1315
1316 if (ret == -ETIMEDOUT) {
1317 tcmu_r = tcmu_rbd_handle_timedout_cmd(dev);
1318 } else if (ret == -ESHUTDOWN || ret == -EROFS) {
1319 tcmu_r = tcmu_rbd_handle_blacklisted_cmd(dev);
1320 } else if (ret == -EILSEQ && aio_cb->type == RBD_AIO_TYPE_CAW) {
1321 cmp_offset = aio_cb->caw.miscompare_offset - aio_cb->caw.offset;
1322 tcmu_dev_dbg(dev, "CAW miscompare at offset %u.\n", cmp_offset);
1323
1324 tcmu_r = TCMU_STS_MISCOMPARE;
1325 tcmu_sense_set_info(tcmur_cmd->lib_cmd->sense_buf, cmp_offset);
1326 } else if (ret == -EINVAL) {
1327 tcmu_dev_err(dev, "Invalid IO request.\n");
1328 tcmu_r = TCMU_STS_INVALID_CDB;
1329 } else if (ret < 0) {
1330 tcmu_dev_err(dev, "Got fatal IO error %"PRId64".\n", ret);
1331
1332 if (aio_cb->type == RBD_AIO_TYPE_READ)
1333 tcmu_r = TCMU_STS_RD_ERR;
1334 else
1335 tcmu_r = TCMU_STS_WR_ERR;
1336 } else {
1337 tcmu_r = TCMU_STS_OK;
1338 if (aio_cb->type == RBD_AIO_TYPE_READ &&
1339 aio_cb->bounce_buffer) {
1340 tcmu_memcpy_into_iovec(iov, iov_cnt,
1341 aio_cb->bounce_buffer,
1342 aio_cb->read.length);
1343 }
1344 }
1345
1346 tcmur_cmd_complete(dev, tcmur_cmd, tcmu_r);
1347
1348 if (aio_cb->bounce_buffer)
1349 free(aio_cb->bounce_buffer);
1350 free(aio_cb);
1351 }
1352
1353 static int tcmu_rbd_read(struct tcmu_device *dev, struct tcmur_cmd *tcmur_cmd,
1354 struct iovec *iov, size_t iov_cnt, size_t length,
1355 off_t offset)
1356 {
1357 struct rbd_aio_cb *aio_cb;
1358 rbd_completion_t completion;
1359 ssize_t ret;
1360
1361 aio_cb = calloc(1, sizeof(*aio_cb));
1362 if (!aio_cb) {
1363 tcmu_dev_err(dev, "Could not allocate aio_cb.\n");
1364 goto out;
1365 }
1366
1367 aio_cb->dev = dev;
1368 aio_cb->type = RBD_AIO_TYPE_READ;
1369 aio_cb->read.length = length;
1370 aio_cb->tcmur_cmd = tcmur_cmd;
1371 aio_cb->iov = iov;
1372 aio_cb->iov_cnt = iov_cnt;
1373
1374 ret = rbd_aio_create_completion
1375 (aio_cb, (rbd_callback_t) rbd_finish_aio_generic, &completion);
1376 if (ret < 0) {
1377 goto out_free_aio_cb;
1378 }
1379
1380 ret = tcmu_rbd_aio_read(dev, aio_cb, completion, iov, iov_cnt,
1381 length, offset);
1382 if (ret < 0)
1383 goto out_release_tracked_aio;
1384
1385 return TCMU_STS_OK;
1386
1387 out_release_tracked_aio:
1388 rbd_aio_release(completion);
1389 out_free_aio_cb:
1390 free(aio_cb);
1391 out:
1392 return TCMU_STS_NO_RESOURCE;
1393 }
1394
1395 static int tcmu_rbd_write(struct tcmu_device *dev, struct tcmur_cmd *tcmur_cmd,
1396 struct iovec *iov, size_t iov_cnt, size_t length,
1397 off_t offset)
1398 {
1399 struct rbd_aio_cb *aio_cb;
1400 rbd_completion_t completion;
1401 ssize_t ret;
1402
1403 aio_cb = calloc(1, sizeof(*aio_cb));
1404 if (!aio_cb) {
1405 tcmu_dev_err(dev, "Could not allocate aio_cb.\n");
1406 goto out;
1407 }
1408
1409 aio_cb->dev = dev;
1410 aio_cb->type = RBD_AIO_TYPE_WRITE;
1411 aio_cb->tcmur_cmd = tcmur_cmd;
1412
1413 ret = rbd_aio_create_completion
1414 (aio_cb, (rbd_callback_t) rbd_finish_aio_generic, &completion);
1415 if (ret < 0) {
1416 goto out_free_aio_cb;
1417 }
1418
1419 ret = tcmu_rbd_aio_write(dev, aio_cb, completion, iov, iov_cnt,
1420 length, offset);
1421 if (ret < 0) {
1422 goto out_release_tracked_aio;
1423 }
1424
1425 return TCMU_STS_OK;
1426
1427 out_release_tracked_aio:
1428 rbd_aio_release(completion);
1429 out_free_aio_cb:
1430 free(aio_cb);
1431 out:
1432 return TCMU_STS_NO_RESOURCE;
1433 }
1434
1435 #ifdef RBD_DISCARD_SUPPORT
1436 static int tcmu_rbd_unmap(struct tcmu_device *dev, struct tcmur_cmd *tcmur_cmd,
1437 uint64_t off, uint64_t len)
1438 {
1439 struct tcmu_rbd_state *state = tcmur_dev_get_private(dev);
1440 struct rbd_aio_cb *aio_cb;
1441 rbd_completion_t completion;
1442 ssize_t ret;
1443
1444 aio_cb = calloc(1, sizeof(*aio_cb));
1445 if (!aio_cb) {
1446 tcmu_dev_err(dev, "Could not allocate aio_cb.\n");
1447 goto out;
1448 }
1449
1450 aio_cb->dev = dev;
1451 aio_cb->tcmur_cmd = tcmur_cmd;
1452 aio_cb->type = RBD_AIO_TYPE_WRITE;
1453 aio_cb->bounce_buffer = NULL;
1454
1455 ret = rbd_aio_create_completion
1456 (aio_cb, (rbd_callback_t) rbd_finish_aio_generic, &completion);
1457 if (ret < 0)
1458 goto out_free_aio_cb;
1459
1460 ret = rbd_aio_discard(state->image, off, len, completion);
1461 if (ret < 0)
1462 goto out_remove_tracked_aio;
1463
1464 return TCMU_STS_OK;
1465
1466 out_remove_tracked_aio:
1467 rbd_aio_release(completion);
1468 out_free_aio_cb:
1469 free(aio_cb);
1470 out:
1471 return TCMU_STS_NO_RESOURCE;
1472 }
1473 #endif /* RBD_DISCARD_SUPPORT */
1474
1475 #ifdef LIBRBD_SUPPORTS_AIO_FLUSH
1476
1477 static int tcmu_rbd_flush(struct tcmu_device *dev, struct tcmur_cmd *tcmur_cmd)
1478 {
1479 struct tcmu_rbd_state *state = tcmur_dev_get_private(dev);
1480 struct rbd_aio_cb *aio_cb;
1481 rbd_completion_t completion;
1482 ssize_t ret;
1483
1484 aio_cb = calloc(1, sizeof(*aio_cb));
1485 if (!aio_cb) {
1486 tcmu_dev_err(dev, "Could not allocate aio_cb.\n");
1487 goto out;
1488 }
1489
1490 aio_cb->dev = dev;
1491 aio_cb->tcmur_cmd = tcmur_cmd;
1492 aio_cb->type = RBD_AIO_TYPE_WRITE;
1493 aio_cb->bounce_buffer = NULL;
1494
1495 ret = rbd_aio_create_completion
1496 (aio_cb, (rbd_callback_t) rbd_finish_aio_generic, &completion);
1497 if (ret < 0) {
1498 goto out_free_aio_cb;
1499 }
1500
1501 ret = rbd_aio_flush(state->image, completion);
1502 if (ret < 0) {
1503 goto out_remove_tracked_aio;
1504 }
1505
1506 return TCMU_STS_OK;
1507
1508 out_remove_tracked_aio:
1509 rbd_aio_release(completion);
1510 out_free_aio_cb:
1511 free(aio_cb);
1512 out:
1513 return TCMU_STS_NO_RESOURCE;
1514 }
1515
1516 #endif
1517
1518 #ifdef RBD_WRITE_SAME_SUPPORT
1519 static int tcmu_rbd_aio_writesame(struct tcmu_device *dev,
1520 struct tcmur_cmd *tcmur_cmd,
1521 uint64_t off, uint64_t len,
1522 struct iovec *iov, size_t iov_cnt)
1523 {
1524 struct tcmu_rbd_state *state = tcmur_dev_get_private(dev);
1525 struct rbd_aio_cb *aio_cb;
1526 rbd_completion_t completion;
1527 size_t length = tcmu_iovec_length(iov, iov_cnt);
1528 ssize_t ret;
1529
1530 aio_cb = calloc(1, sizeof(*aio_cb));
1531 if (!aio_cb) {
1532 tcmu_dev_err(dev, "Could not allocate aio_cb.\n");
1533 goto out;
1534 }
1535
1536 aio_cb->dev = dev;
1537 aio_cb->tcmur_cmd = tcmur_cmd;
1538 aio_cb->type = RBD_AIO_TYPE_WRITE;
1539
1540 aio_cb->bounce_buffer = malloc(length);
1541 if (!aio_cb->bounce_buffer) {
1542 tcmu_dev_err(dev, "Failed to allocate bounce buffer.\n");
1543 goto out_free_aio_cb;
1544 }
1545
1546 tcmu_memcpy_from_iovec(aio_cb->bounce_buffer, length, iov, iov_cnt);
1547
1548 ret = rbd_aio_create_completion
1549 (aio_cb, (rbd_callback_t) rbd_finish_aio_generic, &completion);
1550 if (ret < 0)
1551 goto out_free_bounce_buffer;
1552
1553 tcmu_dev_dbg(dev, "Start write same off:%"PRIu64", len:%"PRIu64"\n", off, len);
1554
1555 ret = rbd_aio_writesame(state->image, off, len, aio_cb->bounce_buffer,
1556 length, completion, 0);
1557 if (ret < 0)
1558 goto out_remove_tracked_aio;
1559
1560 return TCMU_STS_OK;
1561
1562 out_remove_tracked_aio:
1563 rbd_aio_release(completion);
1564 out_free_bounce_buffer:
1565 free(aio_cb->bounce_buffer);
1566 out_free_aio_cb:
1567 free(aio_cb);
1568 out:
1569 return TCMU_STS_NO_RESOURCE;
1570 }
1571 #endif /* RBD_WRITE_SAME_SUPPORT */
1572
1573 #ifdef RBD_COMPARE_AND_WRITE_SUPPORT
1574 static int tcmu_rbd_aio_caw(struct tcmu_device *dev, struct tcmur_cmd *tcmur_cmd,
1575 uint64_t off, uint64_t len, struct iovec *iov,
1576 size_t iov_cnt)
1577 {
1578 struct tcmu_rbd_state *state = tcmur_dev_get_private(dev);
1579 struct rbd_aio_cb *aio_cb;
1580 rbd_completion_t completion;
1581 uint64_t buffer_length = 2 * len;
1582 ssize_t ret;
1583
1584 aio_cb = calloc(1, sizeof(*aio_cb));
1585 if (!aio_cb) {
1586 tcmu_dev_err(dev, "Could not allocate aio_cb.\n");
1587 goto out;
1588 }
1589
1590 aio_cb->dev = dev;
1591 aio_cb->tcmur_cmd = tcmur_cmd;
1592 aio_cb->type = RBD_AIO_TYPE_CAW;
1593 aio_cb->caw.offset = off;
1594
1595 aio_cb->bounce_buffer = malloc(buffer_length);
1596 if (!aio_cb->bounce_buffer) {
1597 tcmu_dev_err(dev, "Failed to allocate bounce buffer.\n");
1598 goto out_free_aio_cb;
1599 }
1600
1601 /* compare followed by write buffer are combined */
1602 tcmu_memcpy_from_iovec(aio_cb->bounce_buffer, buffer_length, iov,
1603 iov_cnt);
1604
1605 ret = rbd_aio_create_completion(
1606 aio_cb, (rbd_callback_t) rbd_finish_aio_generic, &completion);
1607 if (ret < 0) {
1608 goto out_free_bounce_buffer;
1609 }
1610
1611 tcmu_dev_dbg(dev, "Start CAW off: %"PRIu64", len: %"PRIu64"\n",
1612 off, len);
1613 ret = rbd_aio_compare_and_write(state->image, off, len,
1614 aio_cb->bounce_buffer,
1615 aio_cb->bounce_buffer + len, completion,
1616 &aio_cb->caw.miscompare_offset, 0);
1617 if (ret < 0)
1618 goto out_remove_tracked_aio;
1619
1620 return TCMU_STS_OK;
1621
1622 out_remove_tracked_aio:
1623 rbd_aio_release(completion);
1624 out_free_bounce_buffer:
1625 free(aio_cb->bounce_buffer);
1626 out_free_aio_cb:
1627 free(aio_cb);
1628 out:
1629 return TCMU_STS_NO_RESOURCE;
1630 }
1631 #endif /* RBD_COMPARE_AND_WRITE_SUPPORT */
1632
1633 static int tcmu_rbd_reconfig(struct tcmu_device *dev,
1634 struct tcmulib_cfg_info *cfg)
1635 {
1636 switch (cfg->type) {
1637 case TCMULIB_CFG_DEV_SIZE:
1638 /*
1639 * Apps will already have resized on the ceph side, so no
1640 * need to double check and have to also handle unblacklisting
1641 * the client from this context.
1642 */
1643 return 0;
1644 case TCMULIB_CFG_DEV_CFGSTR:
1645 case TCMULIB_CFG_WRITE_CACHE:
1646 default:
1647 return -EOPNOTSUPP;
1648 }
1649 }
1650
1651 static int tcmu_rbd_init(void)
1652 {
1653 darray_init(blacklist_caches);
1654 return 0;
1655 }
1656
1657 static void tcmu_rbd_destroy(void)
1658 {
1659 char **entry;
1660
1661 tcmu_info("destroying the rbd handler\n");
1662 pthread_mutex_lock(&blacklist_caches_lock);
1663 if (darray_empty(blacklist_caches))
1664 goto unlock;
1665
1666 /* Try to remove all the stale blacklist entities */
1667 darray_foreach(entry, blacklist_caches)
1668 free(*entry);
1669
1670 darray_free(blacklist_caches);
1671
1672 unlock:
1673 pthread_mutex_unlock(&blacklist_caches_lock);
1674 }
1675
1676 /*
1677 * For backstore creation
1678 *
1679 * Specify poolname/devicename[;option1;option2;...], e.g,
1680 *
1681 * $ targetcli /backstores/user:rbd create test 2G rbd/test[;osd_op_timeout=30]
1682 *
1683 * poolname must be the name of an existing rados pool.
1684 *
1685 * devicename is the name of the rbd image.
1686 */
1687 static const char tcmu_rbd_cfg_desc[] =
1688 "RBD config string is of the form:\n"
1689 "poolname/devicename[;option1;option2;...]\n"
1690 "where:\n"
1691 "poolname: Existing RADOS pool\n"
1692 "devicename: Name of the RBD image\n"
1693 "optionN: Like: \"osd_op_timeout=30\" in secs\n"
1694 " \"conf=/etc/ceph/cluster.conf\"\n"
1695 " \"id=user\"\n";
1696
1697 struct tcmur_handler tcmu_rbd_handler = {
1698 .name = "Ceph RBD handler",
1699 .subtype = "rbd",
1700 .cfg_desc = tcmu_rbd_cfg_desc,
1701 .open = tcmu_rbd_open,
1702 .close = tcmu_rbd_close,
1703 .read = tcmu_rbd_read,
1704 .write = tcmu_rbd_write,
1705 .reconfig = tcmu_rbd_reconfig,
1706 #ifdef LIBRADOS_SUPPORTS_SERVICES
1707 .report_event = tcmu_rbd_report_event,
1708 #endif
1709 #ifdef LIBRBD_SUPPORTS_AIO_FLUSH
1710 .flush = tcmu_rbd_flush,
1711 #endif
1712 #ifdef RBD_DISCARD_SUPPORT
1713 .unmap = tcmu_rbd_unmap,
1714 #endif
1715 #ifdef RBD_WRITE_SAME_SUPPORT
1716 .writesame = tcmu_rbd_aio_writesame,
1717 #endif
1718 #ifdef RBD_COMPARE_AND_WRITE_SUPPORT
1719 .caw = tcmu_rbd_aio_caw,
1720 #endif
1721 #ifdef RBD_LOCK_ACQUIRE_SUPPORT
1722 .lock = tcmu_rbd_lock,
1723 .unlock = tcmu_rbd_unlock,
1724 .get_lock_tag = tcmu_rbd_get_lock_tag,
1725 .get_lock_state = tcmu_rbd_get_lock_state,
1726 #endif
1727 .init = tcmu_rbd_init,
1728 .destroy = tcmu_rbd_destroy,
1729 };
1730
1731 int handler_init(void)
1732 {
1733 return tcmur_register_handler(&tcmu_rbd_handler);
1734 }
0 tcmu (1.5.4-4) unstable; urgency=medium
1
2 * QA upload.
3 * Add fix-use-after-free.patch. Closes: #1013050
4 * d/p/series: Remove mention of non-existing patches.
5
6 -- Håvard F. Aasen <havard.f.aasen@pfft.no> Sat, 23 Jul 2022 23:53:15 +0200
7
08 tcmu (1.5.4-3) unstable; urgency=medium
19
210 * QA upload.
0 From: Xiubo Li <xiubli@redhat.com>
1 Date: Mon, 30 Aug 2021 13:03:41 +0800
2 Subject: [PATCH] rbd: fix use-after-free of addr
3
4 Signed-off-by: Xiubo Li <xiubli@redhat.com>
5
6 Origin: upstream, https://github.com/open-iscsi/tcmu-runner/commit/ec8d252494b0ddfcd684506ff88946e852fac58e
7 ---
8 rbd.c | 4 +++-
9 1 file changed, 3 insertions(+), 1 deletion(-)
10
11 diff --git a/rbd.c b/rbd.c
12 index 82afd9b..55950da 100644
13 --- a/rbd.c
14 +++ b/rbd.c
15 @@ -308,10 +308,10 @@ static void tcmu_rbd_rm_stale_entry_from_blacklist(struct tcmu_device *dev, char
16 "\"blacklistop\": \"rm\","
17 "\"addr\": \"%s\"}",
18 addr);
19 - free(addr);
20 if (ret < 0) {
21 tcmu_dev_warn(dev, "Could not allocate command. (Err %d)\n",
22 ret);
23 + free(addr);
24 return;
25 }
26 ret = rados_mon_command(state->cluster, (const char**)&cmd, 1, NULL, 0,
27 @@ -320,8 +320,10 @@ static void tcmu_rbd_rm_stale_entry_from_blacklist(struct tcmu_device *dev, char
28 if (ret < 0) {
29 tcmu_dev_err(dev, "Could not rm blacklist entry '%s'. (Err %d)\n",
30 addr, ret);
31 + free(addr);
32 return;
33 }
34 + free(addr);
35 }
36 }
37
0 #Disable-Werror.patch
10 32bit-size_t-cast.patch
2 #CVE-2021-3139.patch
1 fix-use-after-free.patch
307307 "\"blacklistop\": \"rm\","
308308 "\"addr\": \"%s\"}",
309309 addr);
310 free(addr);
311310 if (ret < 0) {
312311 tcmu_dev_warn(dev, "Could not allocate command. (Err %d)\n",
313312 ret);
313 free(addr);
314314 return;
315315 }
316316 ret = rados_mon_command(state->cluster, (const char**)&cmd, 1, NULL, 0,
319319 if (ret < 0) {
320320 tcmu_dev_err(dev, "Could not rm blacklist entry '%s'. (Err %d)\n",
321321 addr, ret);
322 free(addr);
322323 return;
323324 }
325 free(addr);
324326 }
325327 }
326328
767767
768768 max_xfer_length = tcmu_dev_get_max_xfer_len(dev) * block_size;
769769 length = round_up(length, max_xfer_length);
770 length = min(length, tcmu_lba_to_byte(dev, lba_cnt));
770 length = min(length, (size_t)tcmu_lba_to_byte(dev, lba_cnt));
771771
772772 if (tcmur_cmd_state_init(tcmur_cmd, sizeof(*write_same), length)) {
773773 tcmu_dev_err(dev, "Failed to calloc write_same data!\n");