[NFS Observation dtrace] Various performance and metrics measurements, such as io sizes, counts, IOPS and checks for NFSv3/4 #tags: nfs, nfsv3, nfsv4, offsets, file offsets, random, sequential, dtrace, illumos, iosize, nfs IOPS
#!/usr/sbin/dtrace -s
#pragma D option quiet
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*
* Copyright (c) 2015 RackTop Systems.
*
* nfsv3rwbyfilecsv.d
* This script traces NFSv3 Read and Write IO, reporting each file touched by
* name if known and kilobytes/sec for IO on this file, averaged over
* `interval` seconds long sample and count of read and write operations done.
* We cannot tell from this the origin of request, but we can tell what was
* accessed and how much work was done.
*
* Results should look similar to the following:
* [timestamp],[command],[path],[rkb/s],[reads],[wkb/s],[writes]
*
* 2015 Oct 13 09:49:24,WRITE,/storage/p01/x/.iormstats.sf,0,0,0,0
* 2015 Oct 13 09:49:24,WRITE,/storage/p01/x/.iormstats.sf,0,0,0,0
* 2015 Oct 13 09:49:24,WRITE,/storage/p01/x/.iormstats.sf,0,0,0,0
* 2015 Oct 13 09:49:24,WRITE,/storage/p01/x/.iormstats.sf,0,0,0,0
*/
BEGIN {
printf("%s,%s,%s,%s,%s,%s,%s\n", "timestamp", "cmd", "pathname",
"read.kb/sec","read.count", "write.kb/sec", "write.count");
interval = 5;
command["op-read-done"] = "READ";
command["op-write-done"] = "WRITE";
ts = walltimestamp;
tick = interval;
}
nfsv3:::op-read-done /ts/ {
this->bytes = args[2]->res_u.ok.data.data_len;
@readCnt[ts, command[probename],
args[1]->noi_curpath] = count();
@reads[ts, command[probename],
args[1]->noi_curpath] = sum(this->bytes);
}
nfsv3:::op-write-done /ts/ {
this->bytes = args[2]->res_u.ok.count;
@writeCnt[ts, command[probename],
args[1]->noi_curpath] = count();
@writes[ts, command[probename],
args[1]->noi_curpath] = sum(this->bytes);
}
profile:::tick-1sec, END
/tick-- == 0/ {
/* Normalize data to report kilobytes/sec for each file. */
normalize(@reads, 1024 * interval);
normalize(@writes, 1024 * interval);
normalize(@readCnt, interval);
normalize(@writeCnt, interval);
printa("%Y,%s,%s,%@d,%@d,%@d,%@d\n", @reads, @readCnt, @writes, @writeCnt);
trunc(@reads);
trunc(@readCnt);
trunc(@writes);
trunc(@writeCnt);
tick = interval;
ts = walltimestamp;
}
#!/usr/sbin/dtrace -s
#pragma D option quiet
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*
* Copyright (c) 2015 RackTop Systems.
*
* nfsv3iobyconncsv.d
* This script displays amount of NFSv3 Read and Write IO, number of
* operations performed including reads and writes, and any metadata
* operations, number of reads and write explicitly, and sizes in KB.,
* by client connection, i.e. by unique IP address.
*/
dtrace:::BEGIN
{
ts = walltimestamp;
interval = 5;
/* printf("Tracing... Interval %d secs.\n", interval); */
printf("%s,%s,%s,%s,%s,%s\n", "client.ip.addr", "reads/sec",
"writes/s", "kbr/s", "kbw/s", "metadata.ops/s");
tick = interval;
}
/* Metadata Operations not counted towards R/W IOs */
nfsv3:::op-commit-done,
nfsv3:::op-pathconf-done,
nfsv3:::op-fsinfo-done,
nfsv3:::op-fsstat-done,
nfsv3:::op-readdirplus-done,
nfsv3:::op-readdir-done,
nfsv3:::op-link-done,
nfsv3:::op-rename-done,
nfsv3:::op-rmdir-done,
nfsv3:::op-remove-done,
nfsv3:::op-mknod-done,
nfsv3:::op-symlink-done,
nfsv3:::op-mkdir-done,
nfsv3:::op-create-done,
nfsv3:::op-write-done,
nfsv3:::op-read-done,
nfsv3:::op-readlink-done,
nfsv3:::op-access-done,
nfsv3:::op-lookup-done,
nfsv3:::op-setattr-done,
nfsv3:::op-getattr-done,
nfsv3:::op-null-done
/ts/ {
@ops[ts, args[0]->ci_remote] = count();
@ops[ts, "ALL_CLIENTS"] = count();
}
nfsv3:::op-read-done
/ts/ {
@reads[ts, args[0]->ci_remote] = count();
@readbytes[ts, args[0]->ci_remote] = sum(args[2]->res_u.ok.data.data_len);
@reads[ts, "ALL_CLIENTS"] = count();
@readbytes[ts, "ALL_CLIENTS"] = sum(args[2]->res_u.ok.data.data_len);
}
nfsv3:::op-write-done
/ts/ {
@writes[ts, args[0]->ci_remote] = count();
@writebytes[ts, args[0]->ci_remote] = sum(args[2]->res_u.ok.count);
@writes[ts, "ALL_CLIENTS"] = count();
@writebytes[ts, "ALL_CLIENTS"] = sum(args[2]->res_u.ok.count);
}
profile:::tick-1sec,END
/tick-- == 0/
{
normalize(@ops, interval);
normalize(@reads, interval);
normalize(@writes, interval);
normalize(@writebytes, 1024 * interval);
normalize(@readbytes, 1024 * interval);
printa("%Y,%s,%@d,%@d,%@d,%@d,%@d\n", @reads, @writes,
@readbytes, @writebytes, @ops);
trunc(@ops);
trunc(@reads);
trunc(@writes);
trunc(@readbytes);
trunc(@writebytes);
tick = interval;
ts = walltimestamp;
}
#!/usr/sbin/dtrace -s
#pragma D option quiet
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*
* Copyright (c) 2016 RackTop Systems.
*
* nfsv3iobyconncsv.d
* This script displays amount of NFSv3 Read and Write IO, number of
* operations performed including reads and writes, and any metadata
* operations, number of reads and writes explicitly, and sizes in KB.,
* by client connection, i.e. by unique IP address and for ALL_CLIENTS.
*/
inline int INTERVAL = 5;
dtrace:::BEGIN
{
printf("%s,%s,%s,%s,%s,%s,%s\n", "client.ip.addr", "timedelta",
"reads/sec", "writes/s", "kbr/s", "kbw/s", "metadata.ops/s");
tick = INTERVAL;
start_time = timestamp;
ts = 0;
}
/* Metadata Operations not counted towards R/W IOs */
nfsv3:::op-commit-done,
nfsv3:::op-pathconf-done,
nfsv3:::op-fsinfo-done,
nfsv3:::op-fsstat-done,
nfsv3:::op-readdirplus-done,
nfsv3:::op-readdir-done,
nfsv3:::op-link-done,
nfsv3:::op-rename-done,
nfsv3:::op-rmdir-done,
nfsv3:::op-remove-done,
nfsv3:::op-mknod-done,
nfsv3:::op-symlink-done,
nfsv3:::op-mkdir-done,
nfsv3:::op-create-done,
nfsv3:::op-write-done,
nfsv3:::op-read-done,
nfsv3:::op-readlink-done,
nfsv3:::op-access-done,
nfsv3:::op-lookup-done,
nfsv3:::op-setattr-done,
nfsv3:::op-getattr-done,
nfsv3:::op-null-done
{
@ops[args[0]->ci_remote] = count();
@ops["ALL_CLIENTS"] = count();
@t[args[0]->ci_remote] = max(ts);
@t["ALL_CLIENTS"] = max(ts);
}
nfsv3:::op-read-done
{
@reads[args[0]->ci_remote] = count();
@readbytes[args[0]->ci_remote] = sum(args[2]->res_u.ok.data.data_len);
@reads["ALL_CLIENTS"] = count();
@readbytes["ALL_CLIENTS"] = sum(args[2]->res_u.ok.data.data_len);
@t[args[0]->ci_remote] = max(ts);
@t["ALL_CLIENTS"] = max(ts);
}
nfsv3:::op-write-done
{
@writes[args[0]->ci_remote] = count();
@writebytes[args[0]->ci_remote] = sum(args[2]->res_u.ok.count);
@writes["ALL_CLIENTS"] = count();
@writebytes["ALL_CLIENTS"] = sum(args[2]->res_u.ok.count);
@t[args[0]->ci_remote] = max(ts);
@t["ALL_CLIENTS"] = max(ts);
}
profile:::tick-1sec,END
/tick-- == 0/
{
normalize(@ops, INTERVAL);
normalize(@reads, INTERVAL);
normalize(@writes, INTERVAL);
normalize(@writebytes, 1024 * INTERVAL);
normalize(@readbytes, 1024 * INTERVAL);
printa("%s,%@d,%@d,%@d,%@d,%@d,%@d\n", @t, @reads, @writes,
@readbytes, @writebytes, @ops);
tick = INTERVAL;
ts = (timestamp - start_time) / 1000000000;
trunc(@ops);
trunc(@reads);
trunc(@writes);
trunc(@readbytes);
trunc(@writebytes);
trunc(@t);
}
#!/usr/sbin/dtrace -s
#pragma D option quiet
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*
* Copyright (c) 2015 RackTop Systems.
*
* nfsv3cmds.d
* This script displays amount of NFSv3 IO and all operations, and their
* count, by client connection, i.e. by unique IP address.
*/
dtrace:::BEGIN
{
interval = 10;
tick = interval;
/* All known NFSv3 commands */
commands["rfs3_commit"] = "COMMIT";
commands["rfs3_pathconf"] = "PATHCONF";
commands["rfs3_fsinfo"] = "FSINFO";
commands["rfs3_fsstat"] = "FSSTAT";
commands["rfs3_readdirplus"] = "READ_DIR+";
commands["rfs3_readdir"] = "READ_DIR";
commands["rfs3_link"] = "LINK";
commands["rfs3_rename"] = "RENAME";
commands["rfs3_rmdir"] = "RMDIR";
commands["rfs3_remove"] = "REMOVE";
commands["rfs3_mknod"] = "MKNODE";
commands["rfs3_symlink"] = "SYMLINK";
commands["rfs3_mkdir"] = "MKDIR";
commands["rfs3_create"] = "CREATE";
commands["rfs3_write"] = "WRITE";
commands["rfs3_read"] = "READ";
commands["rfs3_readlink"] = "READ_LINK";
commands["rfs3_access"] = "ACCESS";
commands["rfs3_lookup"] = "LOOKUP";
commands["rfs3_setattr"] = "SET_ATTR";
commands["rfs3_getattr"] = "GET_ATTR";
commands["rpc_null_v3"] = "NULL";
}
nfsv3:::op-commit-done,
nfsv3:::op-pathconf-done,
nfsv3:::op-fsinfo-done,
nfsv3:::op-fsstat-done,
nfsv3:::op-readdirplus-done,
nfsv3:::op-readdir-done,
nfsv3:::op-link-done,
nfsv3:::op-rename-done,
nfsv3:::op-rmdir-done,
nfsv3:::op-remove-done,
nfsv3:::op-mknod-done,
nfsv3:::op-symlink-done,
nfsv3:::op-mkdir-done,
nfsv3:::op-create-done,
nfsv3:::op-write-done,
nfsv3:::op-read-done,
nfsv3:::op-readlink-done,
nfsv3:::op-access-done,
nfsv3:::op-lookup-done,
nfsv3:::op-setattr-done,
nfsv3:::op-getattr-done,
nfsv3:::op-null-done
{
@ops[args[0]->ci_remote, commands[probefunc]] = count();
}
nfsv3:::op-read-done
{
@readbytes[args[0]->ci_remote,
args[2]->res_u.ok.count] = sum(args[2]->res_u.ok.data.data_len);
}
nfsv3:::op-write-done
{
@writebytes[args[0]->ci_remote,
args[2]->res_u.ok.count] = sum(args[2]->res_u.ok.count);
}
profile:::tick-1sec
/tick-- == 0/
{
this->ts = walltimestamp;
printf("---- Begin IO Byte Counts [%Y] ----\n", this->ts);
printa("Client[R]: %s IO Size: %d Total bytes: %@d\n", @readbytes);
printa("Client[W]: %s IO Size: %d Total bytes: %@d\n", @writebytes);
printf("---- END Byte Counts ----\n\n");
printf("\n---- Begin Operations [%Y] ----\n", this->ts);
printa("Client: %s CMD: %s OP.Count: %@d\n", @ops);
printf("---- END Operations ----\n\n");
trunc(@readbytes);
trunc(@writebytes);
tick = interval;
}
dtrace -qn '
/*
* Register when a request is received from client
*/
nfsv3:::op-read-start,nfsv3:::op-write-start {
self->x[args[1]->noi_xid] = timestamp;
self->caddr[args[1]->noi_xid] = args[0]->ci_remote;
}
/*
* Register when client acknowledges completion of operation
*/
nfsv3:::op-read-done,nfsv3:::op-write-done /self->x[args[1]->noi_xid]/ {
this->delta = timestamp - self->x[args[1]->noi_xid];
@[self->caddr[args[1]->noi_xid]] = quantize(this->delta / 1000);
self->x[args[1]->noi_xid] = 0;
self->caddr[args[1]->noi_xid] = 0;
}'
// Same as above, but splits commands into two histograms, one for reads and one for writes.
dtrace -qn '
string op[string];
BEGIN {
op["op-read-done"] = "R";
op["op-write-done"] = "W";
}
/*
* Register when a request is received from client
*/
nfsv3:::op-read-start,nfsv3:::op-write-start {
self->x[args[1]->noi_xid] = timestamp;
self->caddr[args[1]->noi_xid] = args[0]->ci_remote;
}
/*
* Register when client acknowledges completion of operation
*/
nfsv3:::op-read-done,nfsv3:::op-write-done /self->x[args[1]->noi_xid]/ {
this->delta = timestamp - self->x[args[1]->noi_xid];
@[op[probename], self->caddr[args[1]->noi_xid]] = quantize(this->delta / 1000);
self->x[args[1]->noi_xid] = 0;
self->caddr[args[1]->noi_xid] = 0;
}'
dtrace -qCs <( print -r '
/*
* Script is meant to very produce buckets of IOsize(s),
* by size of IO, grouped by Random and Sequential.
* Four histograms are generated, two for Read and Write command.
* One histogram being all sequential IOs, and one being
* all non-sequential IOs.
*/
#include <nfs/nfs.h>
offset3 last_off[string];
count3 last_count[string];
int ok_r, ok_w;
::rfs3_read:entry /last_count["read"] == 0/ {
last_off["read"] = args[0]->offset ;
last_count["read"] = args[0]->count ;
ok_r = 1;
}
::rfs3_read:entry /ok_r/ {
this->offset = args[0]->offset ;
@rd[ last_off["read"] + last_count["read"] == this->offset ?
"SEQ" : "RAND"] = quantize(args[0]->count);
/* Update last offset and count with latest value. */
last_off["read"] = this->offset ;
last_count["read"] = args[0]->count ;
}
::rfs3_write:entry /last_count["write"] == 0/ {
last_off["write"] = args[0]->offset ;
last_count["write"] = args[0]->count ;
ok_w = 1;
}
::rfs3_write:entry /ok_w/ {
this->offset = args[0]->offset ;
@wr[ last_off["write"] + last_count["write"] == this->offset ?
"SEQ" : "RAND"] = quantize(args[0]->count);
/* Update last offset and count with latest value. */
last_off["write"] = this->offset ;
last_count["write"] = args[0]->count ;
}
END {
printa("\t[%s] Read(bytes): %@d\n", @rd);
printa("\t[%s] Write(bytes): %@d\n", @wr);
}'
)
/usr/sbin/dtrace -qn'
#pragma D option aggsize=16m
#pragma D option bufsize=8m
#pragma D option dynvarsize=8m
string typ[string];
BEGIN
{
printf("source,cmd,path,IOs,avg.lat.ns,bytes\n");
typ["op-access-done"] = "access";
typ["op-commit-done"] = "commit";
typ["op-create-done"] = "create";
typ["op-fsinfo-done"] = "fsinfo";
typ["op-fsstat-done"] = "fsstat";
typ["op-getattr-done"] = "getattr";
typ["op-link-done"] = "link";
typ["op-lookup-done"] = "lookup";
typ["op-mkdir-done"] = "mkdir";
typ["op-mknod-done"] = "mknod";
typ["op-null-done"] = "null";
typ["op-pathconf-done"] = "pathconf";
typ["op-read-done"] = "read";
typ["op-readdir-done"] = "readdir";
typ["op-readdirplus-done"] = "readdirplus";
typ["op-readlink-done"] = "readlink";
typ["op-remove-done"] = "remove";
typ["op-rename-done"] = "rename";
typ["op-rmdir-done"] = "rmdir";
typ["op-setattr-done"] = "setattr";
typ["op-symlink-done"] = "symlink";
typ["op-write-done"] = "write";
}
nfsv3:::op-access-start,
nfsv3:::op-commit-start,
nfsv3:::op-create-start,
nfsv3:::op-fsinfo-start,
nfsv3:::op-fsstat-start,
nfsv3:::op-getattr-start,
nfsv3:::op-link-start,
nfsv3:::op-lookup-start,
nfsv3:::op-mkdir-start,
nfsv3:::op-mknod-start,
nfsv3:::op-null-start,
nfsv3:::op-pathconf-start,
nfsv3:::op-read-start,
nfsv3:::op-readdir-start,
nfsv3:::op-readdirplus-start,
nfsv3:::op-readlink-start,
nfsv3:::op-remove-start,
nfsv3:::op-rename-start,
nfsv3:::op-rmdir-start,
nfsv3:::op-setattr-start,
nfsv3:::op-symlink-start,
nfsv3:::op-write-start
{
self->src_addr = args[0]->ci_remote;
tm[self->src_addr, args[1]->noi_xid] = timestamp;
self->have_path = args[1]->noi_curpath == NULL ? 0 : 1;
/* We should make sure we handle empty value as well */
self->path = args[1]->noi_curpath != "" ?
args[1]->noi_curpath : "NOPATH";
}
nfsv3:::op-create-start,
nfsv3:::op-mkdir-start,
nfsv3:::op-mknod-start
/ self->have_path && self->path != "NOPATH" && self->path != "<unknown>" /
{
self->path = strjoin(strjoin(self->path, "/"), args[2]->where.name)
}
nfsv3:::op-lookup-start
/ self->have_path && self->path != "NOPATH" && self->path != "<unknown>" /
{
self->path = strjoin(strjoin(self->path, "/"), args[2]->what.name)
}
nfsv3:::op-remove-start,
nfsv3:::op-rmdir-start
/ self->have_path && self->path != "NOPATH" && self->path != "<unknown>" /
{
self->path = strjoin(strjoin(self->path, "/"), args[2]->object.name)
}
nfsv3:::op-rename-start
/ self->have_path && self->path != "NOPATH" && self->path != "<unknown>" /
{
self->path = strjoin(strjoin(self->path, "/"), args[2]->from.name)
}
nfsv3:::op-symlink-start
/ self->have_path && self->path != "NOPATH" && self->path != "<unknown>" /
{
self->path = strjoin(strjoin(self->path, "/"), args[2]->symlink.symlink_data)
}
nfsv3:::op-read-done,
nfsv3:::op-write-done
/ self->have_path && tm[self->src_addr, args[1]->noi_xid] /
{
self->bytes = args[2]->res_u.ok.count ;
@ios[self->src_addr, typ[probename], self->path] = count();
@lat_us[self->src_addr, typ[probename], self->path] =
avg(timestamp - tm[self->src_addr, args[1]->noi_xid]);
@s[self->src_addr, typ[probename], self->path] = sum(self->bytes);
++data;
}
nfsv3:::op-access-done,
nfsv3:::op-commit-done,
nfsv3:::op-create-done,
nfsv3:::op-fsinfo-done,
nfsv3:::op-fsstat-done,
nfsv3:::op-getattr-done,
nfsv3:::op-link-done,
nfsv3:::op-lookup-done,
nfsv3:::op-mkdir-done,
nfsv3:::op-mknod-done,
nfsv3:::op-null-done,
nfsv3:::op-pathconf-done,
nfsv3:::op-readdir-done,
nfsv3:::op-readdirplus-done,
nfsv3:::op-readlink-done,
nfsv3:::op-remove-done,
nfsv3:::op-rename-done,
nfsv3:::op-rmdir-done,
nfsv3:::op-setattr-done,
nfsv3:::op-symlink-done
/ self->have_path && tm[self->src_addr, args[1]->noi_xid] /
{
@ios[self->src_addr, typ[probename], self->path] = count();
@lat_us[self->src_addr, typ[probename], self->path] =
avg(timestamp - tm[self->src_addr, args[1]->noi_xid]);
@s[self->src_addr, typ[probename], self->path] = sum(0);
++data;
}
nfsv3:::op-access-done,
nfsv3:::op-commit-done,
nfsv3:::op-create-done,
nfsv3:::op-fsinfo-done,
nfsv3:::op-fsstat-done,
nfsv3:::op-getattr-done,
nfsv3:::op-link-done,
nfsv3:::op-lookup-done,
nfsv3:::op-mkdir-done,
nfsv3:::op-mknod-done,
nfsv3:::op-null-done,
nfsv3:::op-pathconf-done,
nfsv3:::op-read-done,
nfsv3:::op-readdir-done,
nfsv3:::op-readdirplus-done,
nfsv3:::op-readlink-done,
nfsv3:::op-remove-done,
nfsv3:::op-rename-done,
nfsv3:::op-rmdir-done,
nfsv3:::op-setattr-done,
nfsv3:::op-symlink-done,
nfsv3:::op-write-done
{
/* Zero out things to make sure we avoid issues with dynvar drops */
self->bytes = 0;
self->have_path = 0;
tm[self->src_addr, args[1]->noi_xid] = 0;
self->src_addr = 0;
self->path = 0;
}
tick-1sec /data/
{
/*
* Shrink each aggregation to 100 observations first, then print those
* 100 observations, which will not mean we are limited to 100 unique
* connection addresses, or 100 unique paths. As it stands, aggregations
* right now use a tuple for their key, which is a set of three values:
* client address, command, path. This means that in theory we could have:
* (client address) x (command) x (path) number of unique keys.
*/
trunc(@ios, 100); trunc(@lat_us, 100); trunc(@s, 100);
/* normalize(@lat_us, data); */
printa("%s,%s,%s,%@d,%@d,%@d\n", @ios, @lat_us, @s);
trunc(@ios); trunc(@lat_us); trunc(@s);
data = 0;
}'
dtrace -qn '
BEGIN {
x["op-read-start"] = "Read" ;
x["op-write-start"] = "Write" ;
}
nfsv3:::op-read-start { self->offs = args[2]->offset ; self->sz = args[2]->count ; }
nfsv3:::op-write-start { self->offs = args[2]->offset ; self->sz = args[2]->data.data_len ; }
nfsv3:::op-read-start,nfsv3:::op-write-start
/self->sz <= 0x1000 && self->offs/ {
@[x[probename], "4K"] = quantize(self->offs);
self->offs = 0 ; self->sz = 0 ;
}
nfsv3:::op-read-start,nfsv3:::op-write-start
/self->sz > 0x1000 && self->sz <= 0x2000 && self->offs/ {
@[x[probename], "8K"] = quantize(self->offs);
self->offs = 0 ; self->sz = 0 ;
}
nfsv3:::op-read-start,nfsv3:::op-write-start
/self->sz > 0x2000 && self->sz <= 0x4000 && self->offs/ {
@[x[probename], "16K"] = quantize(self->offs);
self->offs = 0 ; self->sz = 0 ;
}
nfsv3:::op-read-start,nfsv3:::op-write-start
/self->sz > 0x4000 && self->sz <= 0x8000 && self->offs/ {
@[x[probename], "32K"] = quantize(self->offs);
self->offs = 0 ; self->sz = 0 ;
}
nfsv3:::op-read-start,nfsv3:::op-write-start
/self->sz > 0x8000 && self->offs/ {
@[x[probename], ">32K"] = quantize(self->offs);
self->offs = 0 ; self->sz = 0 ;
}
END {
printa("[%s] IOsize(%s): %@d\n", @);
}'
dtrace -qn '
long ios_per_sec[string];
long prev_timestamp[string];
inline const int NSEC_PER_SEC = 1000000000;
BEGIN {
start = timestamp ;
prev_timestamp["op-read-start"] = start;
prev_timestamp["op-write-start"] = start;
}
nfsv3:::op-read-start {
self->sz = args[2]->count;
self->ipaddr = inet_ntoa(&((struct sockaddr_in *)((struct svc_req *)arg0)->rq_xprt->xp_xpc.xpc_rtaddr.buf)->sin_addr.S_un.S_addr);
this->port = ((struct sockaddr_in *)((struct svc_req *)arg0)->rq_xprt->xp_xpc.xpc_rtaddr.buf)->sin_port;
}
nfsv3:::op-write-start {
self->sz = args[2]->data.data_len;
self->ipaddr = inet_ntoa(&((struct sockaddr_in *)((struct svc_req *)arg0)->rq_xprt->xp_xpc.xpc_rtaddr.buf)->sin_addr.S_un.S_addr);
this->port = ((struct sockaddr_in *)((struct svc_req *)arg0)->rq_xprt->xp_xpc.xpc_rtaddr.buf)->sin_port;
}
nfsv3:::op-read-start,nfsv3:::op-write-start
/self->sz && self->ipaddr != NULL/ {
this->dir = probename == "op-read-start" ? "READ" : "WRITE" ;
@bukit[self->ipaddr, this->dir] = quantize(self->sz);
@tot[self->ipaddr, this->dir] = sum(self->sz);
@avsz[self->ipaddr, this->dir] = avg(self->sz);
@minsz[self->ipaddr, this->dir] = min(self->sz);
@maxsz[self->ipaddr, this->dir] = max(self->sz);
@ct[self->ipaddr, this->dir] = count();
self->sz = 0;
self->ipaddr = 0;
ioct[this->dir]++ ;
}
nfsv3:::op-read-start,nfsv3:::op-write-start
/timestamp >= (prev_timestamp[probename] + 1000000000) / {
this->now = timestamp ;
this->delta = this->now - prev_timestamp[probename];
this->dir = probename == "op-read-start" ? "READ" : "WRITE" ;
this->perSec = (ios_per_sec[this->dir] * NSEC_PER_SEC) / this->delta ;
prev_timestamp[probename] = this->now ; /* update timestamp for next run */
@ios_dist[this->dir, "Second"] = quantize(ioct[this->dir]);
ioct[this->dir] = 0; /* reset counter for next second */
}
END {
this->stop = timestamp;
normalize(@tot, 1024*1024);
normalize(@maxsz, 1024);
normalize(@avsz, 1024);
printa("\t IOsize(b)\t\tClient=%s Op=%s%@d\n", @bukit);
printf("\n----------------------------------------\n\n");
printa(" %-5s IOs per %s%@d\n", @ios_dist);
printf("----------------------------------------\n\n");
printa("%-15s %s total(MB)=%@d min(b)=%@d max(KB)=%@d avg(KB)=%@d issued=%@d \n",
@tot, @minsz, @maxsz, @avsz, @ct);
printf("--------------------\nTotal Runtime: %d seconds\n",
(this->stop - start)/ NSEC_PER_SEC);
}'
// NFSv3 Read/Write offsets by type of IO (R/W), quantized.
dtrace -qn '
BEGIN {
op["op-read-start"] = "READ Offset(KB)";
op["op-write-start"] = "WRITE Offset(KB)";
printf("Tracing file offsets... Ctrl-C to End\n");
}
nfsv3:::op-read-start,
nfsv3:::op-write-start {
@[op[probename]] = quantize(args[2]->offset >> 10)
}
END {
printa(@);
}'
#!/usr/bin/ksh
#
# show affect of alignment
#
### default variables
opt_blocksize=4096 # blocksize for alignment measurements
### process options
while getopts b:h name
do
case $name in
b) opt_blocksize=$OPTARG ;;
h) cat <<END >&2
USAGE: nfsalignment [-b blocksize]
-b blocksize # alignment blocksize (default=${opt_blocksize})
END
exit 1
esac
done
dtrace -qn '
inline int OPT_blocksize = '$opt_blocksize';
inline int READ_ALIGNED = 0;
inline int WRITE_ALIGNED = 1;
inline int READ_UNALIGNED = 2;
inline int WRITE_UNALIGNED = 3;
int self align_cnt[int];
dtrace:::BEGIN
{
printf("tracing... press [ctrl]+C when done\n");
rw["op-read-start"] = "R";
rw["op-write-start"] = "W";
}
nfsv3:::op-read-start,
nfsv3:::op-write-start,
nfsv4:::op-read-start,
nfsv4:::op-write-start
{
this->ts=timestamp;
this->align=args[2]->offset % OPT_blocksize;
/* todo: option for offset normalization */
/* this->align=this->align > 2048 ? this->align=4096 - this->align
: this->align;
*/
}
nfsv3:::op-read-start,
nfsv4:::op-read-start
{
this->aligned = args[2]->offset % OPT_blocksize == 0 ? 1 : 0;
this->unaligned = args[2]->offset % OPT_blocksize != 0 ? 1 : 0;
self->align_cnt[READ_ALIGNED] += this->aligned;
self->align_cnt[READ_UNALIGNED] += this->unaligned;
}
nfsv3:::op-write-start,
nfsv4:::op-write-start
{
this->aligned = args[2]->offset % OPT_blocksize == 0 ? 1 : 0;
this->unaligned = args[2]->offset % OPT_blocksize != 0 ? 1 : 0;
self->align_cnt[WRITE_ALIGNED] += this->aligned;
self->align_cnt[WRITE_UNALIGNED] += this->unaligned;
}
/* direction of I/O */
nfsv3:::op-read-start,
nfsv4:::op-read-start
nfsv3:::op-write-start,
nfsv4:::op-write-start
{
this->rw = rw[probename];
this->fh = args[1]->noi_curpath;
}
/* version of I/O */
nfsv3:::op-read-start,
nfsv3:::op-write-start
{
this->vers="3";
}
nfsv4:::op-read-start,
nfsv4:::op-write-start
{
this->vers="4";
}
nfsv3:::op-read-done,
nfsv3:::op-write-done,
nfsv4:::op-read-done,
nfsv4:::op-write-done
/this->ts && this->rw != NULL/
{
@t[this->rw, this->vers, this->align, this->fh] =
avg((timestamp - this->ts)/1000);
@c[this->rw, this->vers, this->align, this->fh] = count();
this->ts=0;
/* Aligned IOs Count */
@a[this->rw, this->vers] = sum(this->rw == "R" ?
self->align_cnt[READ_ALIGNED] :
self->align_cnt[WRITE_ALIGNED]);
/* Unaligned IOs Count */
@u[this->rw, this->vers] = sum(this->rw == "R" ?
self->align_cnt[READ_UNALIGNED] :
self->align_cnt[WRITE_UNALIGNED]);
self->align_cnt[READ_ALIGNED] = 0;
self->align_cnt[READ_UNALIGNED] = 0;
self->align_cnt[WRITE_ALIGNED] = 0;
self->align_cnt[WRITE_UNALIGNED] = 0;
}
dtrace:::END
{
printf("NFS alignment analysis for blocksize=%d\n", OPT_blocksize);
printf("Sample end time %Y\n", walltimestamp);
/* Report on total aligned and unaligned I/Os */
printa("(%s) Count I/Os Aligned NFSv%s => %@d\n", @a);
printa("(%s) Count I/Os Unaligned NFSv%s => %@d\n", @u);
printf("%8s %8s %20s %10s %20s %20s\n",
"r/w", "version", "misalignment (bytes)","Count", "Avg_t (us)", "Path");
printa("%8s %8s %20d %10@d %20@d %20s\n", @c, @t);
}'
dtrace -qn '
BEGIN {
x["op-read-start"] = "Read" ;
x["op-write-start"] = "Write" ;
}
nfsv3:::op-read-start { self->sz = args[2]->count }
nfsv3:::op-write-start { self->sz = args[2]->data.data_len }
nfsv3:::op-read-start,nfsv3:::op-write-start /self->sz/ {
@[x[probename]] = quantize(self->sz);
self->sz = 0;
}
END {
printa("[%s] IOsize(b): %@d\n", @);
}'
// Observe by name of file a distribution of offsets within a file beeing seeked.
// This is useful for observing VMs. Offset distance is in bytes from start of file.
dtrace -qn 'nfsv3:::op-read-start,nfsv3:::op-write-start {
@[args[1]->noi_curpath] = quantize(args[2]->offset >> 10)
}'
// Observe offsets withing a file being seeked by iosize.
// This is useful when a load generation tool is creating a known iosize.
dtrace -qn '
BEGIN {
m["op-read-start"] = "R" ;
m["op-write-start"] = "W" ;
}
nfsv3:::op-read-start {self->sz = args[2]->count}
nfsv3:::op-write-start {self->sz = args[2]->data.data_len}
nfsv3:::op-read-start,nfsv3:::op-write-start /self->sz/ {
/* @[args[1]->noi_curpath] = quantize(args[2]->offset >> 10); */
@[m[probename], self->sz] = quantize(args[2]->offset >> 10);
self->sz = 0;
}
END {
trunc(@, 5);
printa("[%s] IOsize: %d bytes\nOffset Distance(KB):%@d\n", @);
}'
dtrace -qn '
#pragma D option aggsize=16m
#pragma D option dynvarsize=38m
int self lastR[string];
int self lastW[string];
BEGIN {
cmd["op-read-start"] = "READ";
cmd["op-write-start"] = "WRITE";
cmd["op-read-done"] = "READ";
cmd["op-write-done"] = "WRITE";
reads = 0;
writes = 0;
printf("timestamp,cmd,path,iosize,avg.offset.kb,min.offset.kb,max.offset.kb,elapsed.us\n");
}
tick-1sec {
ts = walltimestamp;
}
/*
* The distance distance value we store in this->distance variable
* is number of bytes that that we skipped from previous offset,
* plus number of bytes read, or written from previous offset.
* If last offset was say 2048 with 1KB IO size, we will be measuring
* jump distance from (2048 + 1024) - new offset. Jump distance is
* not strictly distance between two consecutive offsets, but a
* distance between last actual position of cursor as a result of
* previous read or write.
*/
nfsv3:::op-read-start /ts/ {
this->len = args[2]->count; /* how much to read */
this->distance = self->lastW[args[1]->noi_curpath] == NULL ? args[2]->offset :
self->lastR[args[1]->noi_curpath] > args[2]->offset ?
self->lastR[args[1]->noi_curpath] - args[2]->offset :
-self->lastR[args[1]->noi_curpath] + args[2]->offset;
/* Preserve current offset, after this->len bytes read */
self->lastR[args[1]->noi_curpath] = args[2]->offset + this->len;
@avgjmp[ts, cmd[probename], args[1]->noi_curpath, this->len] =
avg(this->distance);
@minjmp[ts, cmd[probename], args[1]->noi_curpath, this->len] =
min(this->distance);
@maxjmp[ts, cmd[probename], args[1]->noi_curpath, this->len] =
max(this->distance);
/* @sdjmp[ts, cmd[probename], args[1]->noi_curpath, this->len] = stddev(this->distance); */
/* this->len = 0; */
this->distance = 0;
this->relt = timestamp;
}
nfsv3:::op-write-start /ts/ {
this->len = args[2]->data.data_len; /* how much to write */
this->distance = self->lastW[args[1]->noi_curpath] == NULL ? args[2]->offset :
self->lastW[args[1]->noi_curpath] > args[2]->offset ?
self->lastW[args[1]->noi_curpath] - args[2]->offset :
-self->lastW[args[1]->noi_curpath] + args[2]->offset;
/* Preserve current offset, after this->len bytes written */
self->lastW[args[1]->noi_curpath] = args[2]->offset + this->len;
@avgjmp[ts, cmd[probename], args[1]->noi_curpath, this->len] =
avg(this->distance);
@minjmp[ts, cmd[probename], args[1]->noi_curpath, this->len] =
min(this->distance);
@maxjmp[ts, cmd[probename], args[1]->noi_curpath, this->len] =
max(this->distance);
/* @sdjmp[ts, cmd[probename], args[1]->noi_curpath, this->len] = stddev(this->distance); */
/* this->len = 0; */
this->distance = 0;
this->relt = timestamp;
}
nfsv3:::op-read-done,nfsv3:::op-write-done /this->relt/ {
/* Measure average amount of time from start of request to finish. */
@avglat[ts, cmd[probename], args[1]->noi_curpath, this->len] =
avg((timestamp - this->relt) / 1000);
this->relt = 0;
this->len = 0;
}
tick-1sec {
normalize(@avgjmp, 1024);
normalize(@minjmp, 1024);
normalize(@maxjmp, 1024);
printa("%Y,%s,%s,%d,%@d,%@d,%@d,%@d\n",
@avgjmp, @minjmp, @maxjmp, @avglat);
trunc(@minjmp); trunc(@maxjmp);
trunc(@avgjmp); trunc(@avglat);
/* trunc(@sdjmp); */
}'
dtrace -qn '
/*
* Measure how random or sequential NFSv3 IO is, and bucket it by
* client IP address. It is useful to know which sizes are dominant
* when IOs have consistent offset distance from previous offset,
* i.e. io is sequential, as opposed to what sizes have constantly
* varied IO distances.
*/
long offset[string, string, string];
BEGIN {
map["op-read-start"] = "Read";
map["op-write-start"] = "Write";
}
nfsv3:::op-read-start,nfsv3:::op-write-start
/!offset[map[probename], args[0]->ci_remote, args[1]->noi_curpath]/ {
offset[map[probename], args[0]->ci_remote, args[1]->noi_curpath] = args[2]->offset;
}
nfsv3:::op-read-start,nfsv3:::op-write-start
/offset[map[probename], args[0]->ci_remote,
args[1]->noi_curpath] != args[2]->offset/ {
this->dist = args[2]->offset > offset[map[probename],
args[0]->ci_remote, args[1]->noi_curpath] ?
args[2]->offset - offset[map[probename],
args[0]->ci_remote, args[1]->noi_curpath] :
offset[map[probename], args[0]->ci_remote,
args[1]->noi_curpath] - args[2]->offset;
@h[map[probename], args[0]->ci_remote] = quantize(this->dist);
offset[map[probename], args[0]->ci_remote,
args[1]->noi_curpath] =
args[2]->offset;
}
END {printa("%s\nIP: %s%@d\n", @h);}'
dtrace -qn '
/* This script is useful for observing any system which uses NFS,
* since this is NFSv3 only, at the moment. The script prints out
* histograms of important metrics, like distances of seeks, and whether
* or not those seeks happen in fixed offsets, which would normally suggest
* that IO is not random in nature. If the distances are always very close,
* it is worth looking into whether using larger record sizes on datasets
* can improve performance, in particular if reads are often concentrated
* in narrow bands within files. This tool is file based, which is most
* meaningful for us. It does not really make sense to compare offsets between
* different files, since results will be nonsense.
*
* We use long for f[fileid3], becase the array stores offsets which are
* of type offset3, which is an alias for uint64.
*/
long f[fileid3]; int lastdist[fileid3];
/* Store file ID and use it to key off of. */
:nfssrv:rfs3_read:entry {
this->fid = (*args[1]).res_u.ok.file_attributes.attr.fileid ;
}
:nfssrv:rfs3_read:entry / f[this->fid] == NULL / {
f[this->fid] = args[0]->offset ;
lastdist[this->fid] = 0 ;
}
:nfssrv:rfs3_read:entry
/ f[this->fid] != NULL && args[0]->offset != f[this->fid] / {
this->prev_off = f[this->fid] ;
this->curr_off = args[0]->offset ;
this->gt = this->curr_off > this->prev_off ;
this->dist = this->gt ? (this->curr_off - this->prev_off) :
-(this->curr_off - this->prev_off) ;
/*
* Uncomment below to troubleshoot, if offsets appear to be wrong.
* printf("0x%x 0x%x dist: 0x%x\n",
* this->prev_off, this->curr_off, this->dist);
*/
@seq["READ"] = quantize(this->dist == lastdist[this->fid]) ;
@d["READ"] = quantize(this->dist >> 10) ;
@gt32["READ"] = quantize(this->dist > 0x8000) ;
@gt64["READ"] = quantize(this->dist > 0x10000) ;
@gt128["READ"] = quantize(this->dist > 0x20000) ;
@gt1m["READ"] = quantize(this->dist > 0x100000) ;
/* Update offset in the map and move on */
f[this->fid] = args[0]->offset ;
lastdist[this->fid] = this->dist ;
}
:nfssrv:rfs3_write:entry {
this->fid = (*args[1]).res_u.ok.file_wcc.after.attr.fileid ;
}
:nfssrv:rfs3_write:entry / f[this->fid] == NULL / {
f[this->fid] = args[0]->offset ;
lastdist[this->fid] = 0 ;
}
:nfssrv:rfs3_write:entry
/ f[this->fid] != NULL && args[0]->offset != f[this->fid] / {
this->prev_off = f[this->fid] ;
this->curr_off = args[0]->offset ;
this->gt = this->curr_off > this->prev_off ;
this->dist = this->gt ? (this->curr_off - this->prev_off) :
-(this->curr_off - this->prev_off) ;
/*
* Uncomment below to troubleshoot, if offsets appear to be wrong.
* printf("0x%x 0x%x dist: 0x%x\n",
* this->prev_off, this->curr_off, this->dist);
*/
@seq["WRITE"] = quantize(this->dist == lastdist[this->fid]) ;
@d["WRITE"] = quantize(this->dist >> 10) ;
@gt32["WRITE"] = quantize(this->dist > 0x8000) ;
@gt64["WRITE"] = quantize(this->dist > 0x10000) ;
@gt128["WRITE"] = quantize(this->dist > 0x20000) ;
@gt1m["WRITE"] = quantize(this->dist > 0x100000) ;
/* Update offset in the map and move on */
f[this->fid] = args[0]->offset ;
lastdist[this->fid] = this->dist ;
}
END {
printa("[%s] Direction:\nSeq == 1 | Random == 0\n%@d\n", @seq) ;
printa("[%s] Seek Dist (KB): %@d\n", @d) ;
printa("[%s] Seek > 32K:\nYes == 1 | No == 0\n%@d\n", @gt32) ;
printa("[%s] Seek > 64K:\nYes == 1 | No == 0\n%@d\n", @gt64) ;
printa("[%s] Seek > 128K:\nYes == 1 | No == 0\n%@d\n", @gt128) ;
printa("[%s] Seek > 1M:\nYes == 1 | No == 0\n%@d\n", @gt1m) ;
}'
dtrace -qn '
BEGIN {
period = 5000; /* Results are printed every 5 seconds */
/* How stable data is expected to be. */
stable_how[0] = "NOSYNC";
stable_how[1] = "DSYNC";
stable_how[2] = "FSYNC";
ts = walltimestamp;
}
nfsv3:::op-read-start,
nfsv3:::op-write-start,
nfsv4:::op-read-start {
tm[args[0]->ci_remote, args[1]->noi_xid] = timestamp;
opsz[args[0]->ci_remote, args[1]->noi_xid] = args[2]->count;
flag[args[0]->ci_remote, args[1]->noi_xid] = 0;
self->nfs=1;
}
/* sdt:::arc-miss gives info on direct users of the arc
* if arc-miss, then we do an I/O */
sdt:::arc-miss /self->nfs/ { self->io = 1; self->nfs = 0 ;}
nfsv3:::op-write-start {
flag[args[0]->ci_remote,args[1]->noi_xid] = args[2]->stable;
}
nfsv4:::op-write-start {
flag[args[0]->ci_remote,args[1]->noi_xid] = args[2]->stable;
tm[args[0]->ci_remote,args[1]->noi_xid] = timestamp;
opsz[args[0]->ci_remote,args[1]->noi_xid] = args[2]->data_len;
}
nfsv3:::op-read-done,
nfsv3:::op-write-done,
nfsv4:::op-read-done,
nfsv4:::op-write-done
/tm[args[0]->ci_remote,args[1]->noi_xid]/
{
/* Total time from start of operation to end of same operation. */
this->delta = (timestamp - tm[args[0]->ci_remote,
args[1]->noi_xid]) / 1000;
/* Flag indicates volatility mode for this operation, i.e.
* `Unstable`, `Data Sync`, or `File Sync`.
*/
this->flag = flag[args[0]->ci_remote,args[1]->noi_xid];
this->file = args[1]->noi_curpath;
this->type = probename == "op-write-done" ? "W" : "R";
/* self->io ? "uncached" : "cached" */
this->type_cache = (probename == "op-write-done") ?
"W": self->io ? "R" : "CR" ;
/* ipaddr=ip[args[1]->noi_xid]; */
this->ipaddr = inet_ntoa(&((struct sockaddr_in *)((struct svc_req *)arg0)->
rq_xprt->xp_xpc.xpc_rtaddr.buf)->sin_addr.S_un.S_addr);
this->port = ((struct sockaddr_in *)((struct svc_req *)arg0)->
rq_xprt->xp_xpc.xpc_rtaddr.buf)->sin_port;
@nfs_fio[ts, this->type, this->file, this->ipaddr, this->port] =
sum(opsz[args[0]->ci_remote, args[1]->noi_xid]);
/*
@nfs_fir["R", this->file, this->ipaddr, this->port] =
sum((this->type == "R" ?
opsz[args[0]->ci_remote, args[1]->noi_xid] : 0));
@nfs_fiw["W", this->file, this->ipaddr, this->port] =
sum((this->type == "W" ?
opsz[args[0]->ci_remote,args[1]->noi_xid] : 0));
*/
/* store size along with max time so the can be correlated */
/* this->overload = ( (this->delta) * (1000*1000*1000) +
opsz[args[0]->ci_remote,args[1]->noi_xid]);
@nfs_mx[this->flag,"R"]=max( (this->type == "R" ? this->overload : 0));
@nfs_mx[this->flag,"W"]=max( (this->type == "W" ? this->overload : 0));
*/
/* Total Completion latency by type of IO and stability flag. */
@nfs_qtclat[stable_how[this->flag],
this->type] = quantize(this->delta);
this->size = opsz[args[0]->ci_remote,args[1]->noi_xid];
this->buck = 512; /* We assume this is smallest possible size */
this->buck = ( this->size > 512 ) ? 1024 : this->buck ;
this->buck = ( this->size > 1024 ) ? 2048 : this->buck ;
this->buck = ( this->size > 4096 ) ? 8192 : this->buck ;
this->buck = ( this->size > 8192 ) ? 16384 : this->buck ;
this->buck = ( this->size > 16384 ) ? 32768 : this->buck ;
this->buck = ( this->size > 32768 ) ? 65536 : this->buck ;
this->buck = ( this->size > 65536 ) ? 131072 : this->buck ;
this->buck = ( this->size > 131072 ) ? 262144 : this->buck ;
this->buck = ( this->size > 262144 ) ? 524288 : this->buck ;
this->buck = ( this->size > 524288 ) ? 1048576 : this->buck ;
/* this->type is a string, representing a Read, Write or Cache */
@nfs_qtmsz[this->buck, this->type] = quantize(this->delta);
@nfs_avtmsz[this->buck, this->type] = avg(this->delta);
@nfs_cttmsz[this->buck, this->type] = count();
@nfs_ctm[stable_how[this->flag], this->type_cache] = sum(this->delta);
@nfs_ct[stable_how[this->flag], this->type_cache] = count();
@nfs_sz[stable_how[this->flag], this->type_cache] = sum(this->size);
tm[args[0]->ci_remote,args[1]->noi_xid] = 0;
opsz[args[0]->ci_remote,args[1]->noi_xid] = 0;
flag[args[0]->ci_remote,args[1]->noi_xid]=0;
self->io = 0;
}
tick-15sec {
normalize(@nfs_sz, 1024);
printf("\t---- NFS Total Size of Data by IO Type ----\n");
printf("\tPossible Types: Cached Read=>CR|Read=>R|Write=>W\n");
printa("\tStability: %-10s IO Type: %-2s KBytes: %@d\n", @nfs_sz);
printf("\t---- End NFS Total Size of Data by IO Type ----\n\n");
printf("\t---- NFS Total Time by IO Type ----\n");
printf("\tPossible Types: Cached Read=>CR|Read=>R|Write=>W\n");
printa("\tStability: %-10s IO Type: %-2s Time(uS): %@d\n", @nfs_ctm);
printf("\t---- End NFS Total Time by IO Type ----\n\n");
printf("\t---- NFS Data IOs Total by IO Type ----\n");
printf("\tPossible Types: Cached Read=>CR|Read=>R|Write=>W\n");
printa("\tStability: %-10s IO Type: %-2s Count: %@d\n", @nfs_ct);
printf("\t---- End NFS Data IOs Time by IO Type ----\n\n");
printf("\t---- NFS Normalized Average Time ----\n");
printa("\tBucket(bytes): %-7d IO Type: %-2s Latency(uS): %@d\n",
@nfs_avtmsz);
printf("\t---- End NFS Normalized Average Time ----\n\n");
printf("\t---- Completion Latency by Size and IO Type ----\n");
printa("\t[%d] bytes\n\tIO Type: %-2s\n\tLatency(uS): %@d\n",
@nfs_qtmsz);
printf("\t---- End Completion Latency by Size and IO Type ----\n\n");
printf("\t---- Completion Latency Overall ----\n");
printa("\tStability: %-10s\n\tIO Type: %-2s\n\tLatency(uS): %@d\n",
@nfs_qtclat);
printf("\t---- End Latency Overall ----\n\n");
printf("\t---- Count of IOs by Size and IO Type ----\n");
printa("\tBucket(bytes): %-10d IO Type: %-2s %-8@d\n", @nfs_cttmsz);
printf("\t---- End Count of IOs by Size and IO Type ----\n\n");
printf("Timestamp, IO Type, File Path, Client IP Address, Port, Size(bytes)\n");
printa("%-20Y %-2s %s %s %d %@d\n", @nfs_fio);
trunc(@nfs_sz);
trunc(@nfs_avtmsz);
trunc(@nfs_qtmsz);
trunc(@nfs_ctm);
trunc(@nfs_ct);
trunc(@nfs_cttmsz);
trunc(@nfs_qtclat);
trunc(@nfs_fio);
ts = walltimestamp;
}'
dtrace -qn '
/*
* Measure how random or sequential NFSv3 IO is, and bucket it by
* client IP address. It is useful to know which sizes are dominant
* when IOs have consistent offset distance from previous offset,
* i.e. io is sequential, as opposed to what sizes have constantly
* varied IO distances.
* Results should look something like this, where a count of both
* is given for duration of this sample period.
* CMD: Read IP: 172.16.1.34 RAND: 256 SEQ: 228
* CMD: Write IP: 172.16.1.2 RAND: 1379 SEQ: 0
* CMD: Write IP: 172.16.1.34 RAND: 1683 SEQ: 3
*/
int lastdist[string, string, string];
int lastpos[string, string, string];
BEGIN {
map["op-read-start"] = "Read";
map["op-write-start"] = "Write";
}
nfsv3:::op-read-start,nfsv3:::op-write-start
/!lastdist[map[probename], args[0]->ci_remote, args[1]->noi_curpath]/ {
lastdist[map[probename], args[0]->ci_remote, args[1]->noi_curpath] = 0;
lastpos[map[probename], args[0]->ci_remote, args[1]->noi_curpath] = 0;
}
nfsv3:::op-read-start,nfsv3:::op-write-start
/lastpos[map[probename], args[0]->ci_remote,
args[1]->noi_curpath] != args[2]->offset/ {
this->dist = args[2]->offset > lastpos[map[probename],
args[0]->ci_remote, args[1]->noi_curpath] ?
args[2]->offset - lastpos[map[probename],
args[0]->ci_remote, args[1]->noi_curpath] :
lastpos[map[probename], args[0]->ci_remote,
args[1]->noi_curpath] - args[2]->offset;
/* Determine wether previous step and last step were same, i.e. */
this->seq = this->dist ==
lastdist[map[probename], args[0]->ci_remote, args[1]->noi_curpath] ? 1 : 0;
@rand[map[probename], args[0]->ci_remote] =
sum(this->seq == 0 ? 1 : 0);
@seq[map[probename], args[0]->ci_remote] =
sum(this->seq == 1 ? 1 : 0);
lastpos[map[probename], args[0]->ci_remote, args[1]->noi_curpath] =
args[2]->offset;
lastdist[map[probename], args[0]->ci_remote, args[1]->noi_curpath] =
this->dist;
}
END {printa("CMD: %s IP: %s RAND: %@d SEQ: %@d\n", @rand, @seq);}'
dtrace -qn '
int self ts[string, int];
nfsv3:::op-read-start, nfsv3:::op-write-start, nfsv3:::op-commit-start {
self->ts[args[0]->ci_remote, args[1]->noi_xid] = timestamp;
}
nfsv3:::op-read-done, nfsv3:::op-write-done, nfsv3:::op-commit-done
/self->ts[args[0]->ci_remote, args[1]->noi_xid] / {
@t[probefunc] = quantize((timestamp -
self->ts[args[0]->ci_remote, args[1]->noi_xid]) / 1000);
self->ts[args[0]->ci_remote, args[1]->noi_xid] = 0;
}
tick-10sec {
printa("%s\n%@d\n", @t);
trunc(@t);
}'
dtrace -qn '
BEGIN {
printf("timestamp,max.pend.reqs,max.actv.threads,percent.pool.free\n");
}
::svc_xprt_qput:entry {
enq = 1;
this->p_reqs = args[0]->p_reqs;
/* ++this->p_reqs; This may be bumped on entry
into routine, but after the entry probe is triggered.
Further validation of this is necessary.
*/
this->pct_used = (args[0]->p_threads - args[0]->p_asleep) > 0
? 100 * (args[0]->p_threads - args[0]->p_asleep) / args[0]->p_maxthreads
: 1;
this->pct_free = this->pct_used < 100 ? 100 - this->pct_used : 0;
@p_reqs = max(this->p_reqs);
@actv_thr = max(args[0]->p_threads - args[0]->p_asleep);
@pct_free = min(this->pct_free);
}
tick-5sec /enq/ {
printf("%Y,", walltimestamp);
printa("%@d,%@d,%@d\n", @p_reqs, @actv_thr, @pct_free);
trunc(@p_reqs);
trunc(@actv_thr);
trunc(@pct_free);
enq = 0;
}'
dtrace -qn '
self int x[int];
int cnt1;
int cnt2;
BEGIN {
cnt1 = 0; /* should not be needed, but
cnt2 = 0; * just in case, extra-careful. */
}
nfsv3:::op-read-start {
self->x[tid] = 1;
ts[args[0]->ci_remote, args[1]->noi_xid] = timestamp;
}
nfsv3:::op-read-done /self->x[tid] > 0/ {
++cnt1;
self->x[tid] = 0;
}
nfsv3:::op-read-done /ts[args[0]->ci_remote, args[1]->noi_xid] > 0/ {
++cnt2;
ts[args[0]->ci_remote, args[1]->noi_xid] = 0;
}
tick-10sec {printf("%d ?= %d\n", cnt1, cnt2); exit(0);}
'
dtrace -qn '
char m[string];
BEGIN {
m["rfs3_read"] = 0x52;
m["rfs3_write"] = 0x57;
}
::rfs3_read:op-read-start {
self->t = timestamp; self->arg2r = args[2]; self->a = args[0]->ci_remote;
@qiosz[m[probefunc], args[0]->ci_remote] = quantize(args[2]->count);
}
::rfs3_write:op-write-start {
self->t = timestamp; self->arg2w = args[2]; self->a = args[0]->ci_remote;
@qiosz[m[probefunc], args[0]->ci_remote] = quantize(args[2]->count);
}
::rfs3_read:op-read-done /self->t/ {
this->delta = (timestamp - self->t) / 1000;
this->b = self->arg2r->count;
@q[m[probefunc], self->a] = quantize(this->delta);
@av[m[probefunc], self->a] = avg(this->delta);
self->a = 0; self->s = 0; self->arg2r = 0;
}
::rfs3_write:op-write-done /self->t/ {
this->delta = (timestamp - self->t) / 1000;
this->b = self->arg2w->data.data_len;
@q[m[probefunc], self->a] = quantize(this->delta);
@av[m[probefunc], self->a] = avg(this->delta);
self->a = 0; self->s = 0; self->arg2w = 0;
}
END {
printa("\t %c - IO size(b) client: %s avg: %@d(b)%@d\n", @av, @qiosz);
printa("\t %c - latency(μS) client: %s avg: %@d(μS)%@d\n", @av, @q);
}'