In some instances a drive fails and while still seemingly online it reports that it is not ready. This is a check that callers make to make sure the device is usable. A non-zero result here means there's a problem with given device.
#!/usr/sbin/dtrace -Cs
#pragma D option quiet
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*
* Copyright (c) 2018 Sam Zaydel / RackTop Systems.
*
* sd-ready-valid-csv.d
*
* Description:
* Script tracks return code from sd_ready_and_valid function, which tells
* the caller (sdopen or sdioctl) whether a given drive is usable.
* When a device fails this test a non-zero value is returned and depending
* on state of drive we should see messages in the kernel log along the lines
* of `drive offline`.
*/
#define SD_TO_DEVINFO(un) ((struct dev_info *)((un)->un_sd->sd_dev))
#define DEV_NAME(un) \
stringof(`devnamesp[SD_TO_DEVINFO(un)->devi_major].dn_name) /* ` */
#define DEV_INST(un) (SD_TO_DEVINFO(un)->devi_instance)
::sd_ready_and_valid:entry {
self->un = args[0]->ssc_un;
}
::sd_ready_and_valid:return /self->un/ {
@[stringof(SD_TO_DEVINFO(self->un)->devi_devid_str),
DEV_INST(self->un), args[1]] = count();
self->un = NULL;
}
END {
printf("device,instance,retcode,count\n");
printa("%s,sd%d,%d,%@d\n", @);
}
#define SD_TO_DEVINFO(un) ((struct dev_info *)((un)->un_sd->sd_dev))
#define DEV_NAME(un) \
stringof(`devnamesp[SD_TO_DEVINFO(un)->devi_major].dn_name) /* ` */
#define DEV_INST(un) (SD_TO_DEVINFO(un)->devi_instance)
#define SD_GET_XBUF(bp) ((struct sd_xbuf *)((bp)->b_private))
BEGIN {
printf("instance,ncmds,ncmds_transport,errno,nretries,failed_cmd,reset\n");
}
::sd_retry_command:entry {
this->un = args[0];
this->ncmds = this->un->un_ncmds_in_driver;
this->ncmds_trans = this->un->un_ncmds_in_transport;
this->errno = args[5]; /* Probably always EIO */
this->xbuf = SD_GET_XBUF(args[1]);
this->retry_cnt = this->xbuf->xb_retry_count;
this->limit = this->xbuf->xb_retry_count >= this->un->un_busy_retry_count;
this->un_reset_retry_count = (this->un->un_reset_retry_count < 2) ? 2 : this->un->un_reset_retry_count;
/* This should also be seen in the system log. Timestamp should help to
* confirm.
*/
this->reset = this->xbuf->xb_retry_count == this->un_reset_retry_count;
printf("sd%d,%d,%d,%d,%d,%s,%s\n",
DEV_INST(this->un), this->ncmds, this->ncmds_trans,
this->errno, this->retry_cnt, this->limit > 0 ? "y" : "n",
this->reset > 0 ? "y" : "n");
}
#!/usr/sbin/dtrace -Cs
#pragma D option quiet
#define SD_TO_DEVINFO(un) ((struct dev_info *)((un)->un_sd->sd_dev))
#define DEV_NAME(un) \
stringof(`devnamesp[SD_TO_DEVINFO(un)->devi_major].dn_name) /* ` */
#define DEV_INST(un) (SD_TO_DEVINFO(un)->devi_instance)
#define SD_GET_XBUF(bp) ((struct sd_xbuf *)((bp)->b_private))
#define YN(val) val != 0 ? "y" : "n"
::sd_return_failed_command_no_restart:entry { this->errno = args[2]; }
::sd_retry_command:entry { this->errno = args[5]; }
::sd_return_failed_command_no_restart:entry,
::sd_retry_command:entry {
this->un = args[0];
this->ncmds = this->un->un_ncmds_in_driver;
this->ncmds_trans = this->un->un_ncmds_in_transport;
/* this->errno = args[5]; */ /* Probably always EIO */
this->xbuf = SD_GET_XBUF(args[1]);
this->retry_cnt = this->xbuf->xb_retry_count;
this->retry_count_limit = this->xbuf->xb_retry_count >= this->un->un_busy_retry_count;
this->un_reset_retry_count = (this->un->un_reset_retry_count < 2) ? 2 : this->un->un_reset_retry_count;
/* This should also be seen in the system log. Timestamp should help to
* confirm.
*/
this->reset = this->xbuf->xb_retry_count == this->un_reset_retry_count;
/* Removable and ejection support are here to make sure we know which sd's
* are not the ones we care about. Anything with either of the two flags set
* will be something other than a disk drive, like a CDROM, etc.
* We can inspect the soft state of the current system with mdb:
* > *sd_state::walk softstate|::print struct sd_lun un_f_has_removable_media un_f_eject_media_supported
*/
this->removable = this->un->un_f_has_removable_media;
this->ejectable = this->un->un_f_eject_media_supported;
printf("%Y instance=sd%d ejectable=%s removable=%s ncmds=%d ncmds_transport=%d errno=%d retries=%d retries_limit_reached=%s reset_triggered=%s probe=%s\n", walltimestamp,
DEV_INST(this->un), YN(this->ejectable), YN(this->removable),
this->ncmds, this->ncmds_trans, this->errno, this->retry_cnt,
this->retry_count_limit > 0 ? "y" : "n",
this->reset > 0 ? "y" : "n", probefunc);
}