#!/usr/sbin/dtrace -Cs
#pragma D option quiet
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*
* Copyright (c) 2017 Sam Zaydel / RackTop Systems.
*
* io-retry-and-err-count-csv.d
*
* Description:
* Script collects a count of IOs that resulted in an error or retry.
* The rate of error is multiplied by `multiplier` and reported with a
* e-6, but without actually doing floating point arithmetic, which
* dtrace does not have support for.
* Expectation here is that if device is experiencing an IO error each
* time it issues an IO, we should see 1000000e-6 in the output, meaning
* rate of error is 1.0.
*/
inline const int multiplier = 1000000;
unsigned long ioct[dev_t], errct[dev_t], timer[dev_t];
BEGIN {
printf("sdname,mpxiowwn,ctretry,cterr,cteio,noxfer,rateerr\n");
}
::sd_set_retry_bp:entry
/ xlate <devinfo_t *>(args[1])->dev_pathname != "<nfs>" &&
xlate <devinfo_t *>(args[1])->dev_pathname != "" /
{
this->sn = xlate <devinfo_t *>(args[1])->dev_statname;
this->xx = xlate <devinfo_t *>(args[1])->dev_pathname;
this->p = substr(this->xx, 25, 16);
this->p = (strstr(this->p, "disk@") == 0 ||
strstr(this->p, "disk@") == "") ? toupper(this->p) : "NA";
@ctretry[this->sn, this->p] = count();
}
io:::start
/ args[1]->dev_pathname != "<nfs>" && args[1]->dev_pathname != "" &&
timer[args[0]->b_edev] == 0/
{
timer[args[0]->b_edev] = timestamp;
}
io:::done
/ args[1]->dev_pathname != "<nfs>" && args[1]->dev_pathname != "" /
{
ioct[args[0]->b_edev]++;
errct[args[0]->b_edev] += args[0]->b_flags & B_ERROR ? 1 : 0;
}
io:::done
/ args[1]->dev_pathname != "<nfs>" && args[1]->dev_pathname != "" &&
timer[args[0]->b_edev] != 0 &&
timestamp - 10000000000 > timer[args[0]->b_edev] /
{
timer[args[0]->b_edev] = timestamp;
this->sn = args[1]->dev_statname;
this->p = substr(args[1]->dev_pathname, 25, 16);
this->p = (strstr(this->p, "disk@") == 0 ||
strstr(this->p, "disk@") == "") ? toupper(this->p) : "NA";
/* This is a hack to work around lack of floating-point support */
this->rate = (multiplier * errct[args[0]->b_edev]) / ioct[args[0]->b_edev];
@maxrateerr[this->sn, this->p] = max(this->rate);
ioct[args[0]->b_edev] = 0;
errct[args[0]->b_edev] = 0;
}
io:::done
/ args[1]->dev_pathname != "<nfs>" && args[1]->dev_pathname != "" &&
args[0]->b_flags & B_ERROR /
{
this->sn = args[1]->dev_statname;
this->p = substr(args[1]->dev_pathname, 25, 16);
this->p = (strstr(this->p, "disk@") == 0 ||
strstr(this->p, "disk@") == "") ? toupper(this->p) : "NA";
/* Any difference between cterr and cteio means not all errors are EIO. */
@cterr[this->sn, this->p] = sum(args[0]->b_flags & B_ERROR ? 1 : 0);
@cteio[this->sn, this->p] = sum(args[0]->b_error == EIO ? 1 : 0);
@noxfer[this->sn, this->p] = sum(args[0]->b_resid);
}
tick-1min
{
printa("%s,%s,%@d,%@d,%@d,%@d,%@de-6\n",
@ctretry, @cterr, @cteio, @noxfer, @maxrateerr);
trunc(@ctretry); trunc(@cterr); trunc(@cteio);
trunc(@noxfer); trunc(@maxrateerr);
}