szaydel
4/22/2015 - 10:29 PM

Dtrace snippets to observe NFSv3 and NFSv4 performance

[NFS Observation dtrace] Various performance and metrics measurements, such as io sizes, counts, IOPS and checks for NFSv3/4 #tags: nfs, nfsv3, nfsv4, offsets, file offsets, random, sequential, dtrace, illumos, iosize, nfs IOPS

#!/usr/sbin/dtrace -s
#pragma D option quiet
/*
 * CDDL HEADER START
 *
 * The contents of this file are subject to the terms of the
 * Common Development and Distribution License (the "License").
 * You may not use this file except in compliance with the License.
 *
 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 * or http://www.opensolaris.org/os/licensing.
 * See the License for the specific language governing permissions
 * and limitations under the License.
 *
 * When distributing Covered Code, include this CDDL HEADER in each
 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
 * If applicable, add the following below this CDDL HEADER, with the
 * fields enclosed by brackets "[]" replaced with your own identifying
 * information: Portions Copyright [yyyy] [name of copyright owner]
 *
 * CDDL HEADER END
 *
 * Copyright (c) 2015 RackTop Systems.
 *
 * nfsv3rwbyfilecsv.d
 * This script traces NFSv3 Read and Write IO, reporting each file touched by
 * name if known and kilobytes/sec for IO on this file, averaged over
 * `interval` seconds long sample and count of read and write operations done.
 * We cannot tell from this the origin of request, but we can tell what was
 * accessed and how much work was done.
 *
 * Results should look similar to the following:
 * [timestamp],[command],[path],[rkb/s],[reads],[wkb/s],[writes]
 *
 * 2015 Oct 13 09:49:24,WRITE,/storage/p01/x/.iormstats.sf,0,0,0,0
 * 2015 Oct 13 09:49:24,WRITE,/storage/p01/x/.iormstats.sf,0,0,0,0
 * 2015 Oct 13 09:49:24,WRITE,/storage/p01/x/.iormstats.sf,0,0,0,0
 * 2015 Oct 13 09:49:24,WRITE,/storage/p01/x/.iormstats.sf,0,0,0,0
 */
BEGIN {
  printf("%s,%s,%s,%s,%s,%s,%s\n", "timestamp", "cmd", "pathname",
  "read.kb/sec","read.count", "write.kb/sec", "write.count");
  interval = 5;
  command["op-read-done"] = "READ";
  command["op-write-done"] = "WRITE";
  ts = walltimestamp;
  tick = interval;
}

nfsv3:::op-read-done /ts/ {
  this->bytes = args[2]->res_u.ok.data.data_len;
  @readCnt[ts, command[probename], 
      args[1]->noi_curpath] = count();
  @reads[ts, command[probename], 
      args[1]->noi_curpath] = sum(this->bytes);
}

nfsv3:::op-write-done /ts/ {
  this->bytes = args[2]->res_u.ok.count;
  @writeCnt[ts, command[probename], 
      args[1]->noi_curpath] = count();
  @writes[ts, command[probename], 
      args[1]->noi_curpath] = sum(this->bytes);
}

profile:::tick-1sec, END
/tick-- == 0/ {
  /* Normalize data to report kilobytes/sec for each file. */
  normalize(@reads, 1024 * interval);
  normalize(@writes, 1024 * interval);  
  normalize(@readCnt, interval);
  normalize(@writeCnt, interval);
  printa("%Y,%s,%s,%@d,%@d,%@d,%@d\n", @reads, @readCnt, @writes, @writeCnt);
  trunc(@reads);
  trunc(@readCnt);
  trunc(@writes);
  trunc(@writeCnt);
  tick = interval;
  ts = walltimestamp;
}
#!/usr/sbin/dtrace -s
#pragma D option quiet
/*
 * CDDL HEADER START
 *
 * The contents of this file are subject to the terms of the
 * Common Development and Distribution License (the "License").
 * You may not use this file except in compliance with the License.
 *
 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 * or http://www.opensolaris.org/os/licensing.
 * See the License for the specific language governing permissions
 * and limitations under the License.
 *
 * When distributing Covered Code, include this CDDL HEADER in each
 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
 * If applicable, add the following below this CDDL HEADER, with the
 * fields enclosed by brackets "[]" replaced with your own identifying
 * information: Portions Copyright [yyyy] [name of copyright owner]
 *
 * CDDL HEADER END
 *
 * Copyright (c) 2015 RackTop Systems.
 *
 * nfsv3iobyconncsv.d
 * This script displays amount of NFSv3 Read and Write IO, number of
 * operations performed including reads and writes, and any metadata
 * operations, number of reads and write explicitly, and sizes in KB.,
 * by client connection, i.e. by unique IP address.
 */

dtrace:::BEGIN
{
    ts = walltimestamp;
    interval = 5;
    /* printf("Tracing... Interval %d secs.\n", interval); */
    printf("%s,%s,%s,%s,%s,%s\n", "client.ip.addr", "reads/sec", 
    "writes/s", "kbr/s", "kbw/s", "metadata.ops/s");
    tick = interval;
}

/* Metadata Operations not counted towards R/W IOs */
nfsv3:::op-commit-done,
nfsv3:::op-pathconf-done,
nfsv3:::op-fsinfo-done,
nfsv3:::op-fsstat-done,
nfsv3:::op-readdirplus-done,
nfsv3:::op-readdir-done,
nfsv3:::op-link-done,
nfsv3:::op-rename-done,
nfsv3:::op-rmdir-done,
nfsv3:::op-remove-done,
nfsv3:::op-mknod-done,
nfsv3:::op-symlink-done,
nfsv3:::op-mkdir-done,
nfsv3:::op-create-done,
nfsv3:::op-write-done,
nfsv3:::op-read-done,
nfsv3:::op-readlink-done,
nfsv3:::op-access-done,
nfsv3:::op-lookup-done,
nfsv3:::op-setattr-done,
nfsv3:::op-getattr-done,
nfsv3:::op-null-done
/ts/ {
    @ops[ts, args[0]->ci_remote] = count();
    @ops[ts, "ALL_CLIENTS"] = count();
}

nfsv3:::op-read-done
/ts/ {
    @reads[ts, args[0]->ci_remote] = count();
    @readbytes[ts, args[0]->ci_remote] = sum(args[2]->res_u.ok.data.data_len);
    @reads[ts, "ALL_CLIENTS"] = count();
    @readbytes[ts, "ALL_CLIENTS"] = sum(args[2]->res_u.ok.data.data_len);
}

nfsv3:::op-write-done
/ts/ {
    @writes[ts, args[0]->ci_remote] = count();
    @writebytes[ts, args[0]->ci_remote] = sum(args[2]->res_u.ok.count);
    @writes[ts, "ALL_CLIENTS"] = count();
    @writebytes[ts, "ALL_CLIENTS"] = sum(args[2]->res_u.ok.count);
}

profile:::tick-1sec,END
/tick-- == 0/
{
    normalize(@ops, interval);
    normalize(@reads, interval);
    normalize(@writes, interval);
    normalize(@writebytes, 1024 * interval);
    normalize(@readbytes, 1024 * interval);

    printa("%Y,%s,%@d,%@d,%@d,%@d,%@d\n", @reads, @writes,
        @readbytes, @writebytes, @ops);
    trunc(@ops);
    trunc(@reads);
    trunc(@writes);
    trunc(@readbytes);
    trunc(@writebytes);
    tick = interval;
    ts = walltimestamp;
}
#!/usr/sbin/dtrace -s
#pragma D option quiet
/*
 * CDDL HEADER START
 *
 * The contents of this file are subject to the terms of the
 * Common Development and Distribution License (the "License").
 * You may not use this file except in compliance with the License.
 *
 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 * or http://www.opensolaris.org/os/licensing.
 * See the License for the specific language governing permissions
 * and limitations under the License.
 *
 * When distributing Covered Code, include this CDDL HEADER in each
 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
 * If applicable, add the following below this CDDL HEADER, with the
 * fields enclosed by brackets "[]" replaced with your own identifying
 * information: Portions Copyright [yyyy] [name of copyright owner]
 *
 * CDDL HEADER END
 *
 * Copyright (c) 2016 RackTop Systems.
 *
 * nfsv3iobyconncsv.d
 * This script displays amount of NFSv3 Read and Write IO, number of
 * operations performed including reads and writes, and any metadata
 * operations, number of reads and writes explicitly, and sizes in KB.,
 * by client connection, i.e. by unique IP address and for ALL_CLIENTS.
 */
 inline int INTERVAL = 5;

dtrace:::BEGIN
{
    printf("%s,%s,%s,%s,%s,%s,%s\n", "client.ip.addr", "timedelta", 
            "reads/sec", "writes/s", "kbr/s", "kbw/s", "metadata.ops/s");
    tick = INTERVAL;
    start_time = timestamp;
    ts = 0;
}

/* Metadata Operations not counted towards R/W IOs */
nfsv3:::op-commit-done,
nfsv3:::op-pathconf-done,
nfsv3:::op-fsinfo-done,
nfsv3:::op-fsstat-done,
nfsv3:::op-readdirplus-done,
nfsv3:::op-readdir-done,
nfsv3:::op-link-done,
nfsv3:::op-rename-done,
nfsv3:::op-rmdir-done,
nfsv3:::op-remove-done,
nfsv3:::op-mknod-done,
nfsv3:::op-symlink-done,
nfsv3:::op-mkdir-done,
nfsv3:::op-create-done,
nfsv3:::op-write-done,
nfsv3:::op-read-done,
nfsv3:::op-readlink-done,
nfsv3:::op-access-done,
nfsv3:::op-lookup-done,
nfsv3:::op-setattr-done,
nfsv3:::op-getattr-done,
nfsv3:::op-null-done
{
    @ops[args[0]->ci_remote] = count();
    @ops["ALL_CLIENTS"] = count();
    @t[args[0]->ci_remote] = max(ts);
    @t["ALL_CLIENTS"] = max(ts);
}

nfsv3:::op-read-done
{
    @reads[args[0]->ci_remote] = count();
    @readbytes[args[0]->ci_remote] = sum(args[2]->res_u.ok.data.data_len);
    @reads["ALL_CLIENTS"] = count();
    @readbytes["ALL_CLIENTS"] = sum(args[2]->res_u.ok.data.data_len);
    @t[args[0]->ci_remote] = max(ts);
    @t["ALL_CLIENTS"] = max(ts);
}

nfsv3:::op-write-done
{
    @writes[args[0]->ci_remote] = count();
    @writebytes[args[0]->ci_remote] = sum(args[2]->res_u.ok.count);
    @writes["ALL_CLIENTS"] = count();
    @writebytes["ALL_CLIENTS"] = sum(args[2]->res_u.ok.count);
    @t[args[0]->ci_remote] = max(ts);
    @t["ALL_CLIENTS"] = max(ts);
}

profile:::tick-1sec,END
/tick-- == 0/
{
    normalize(@ops, INTERVAL);
    normalize(@reads, INTERVAL);
    normalize(@writes, INTERVAL);
    normalize(@writebytes, 1024 * INTERVAL);
    normalize(@readbytes, 1024 * INTERVAL);

    printa("%s,%@d,%@d,%@d,%@d,%@d,%@d\n", @t, @reads, @writes,
        @readbytes, @writebytes, @ops);
    tick = INTERVAL;
    ts = (timestamp - start_time) / 1000000000;
    trunc(@ops);
    trunc(@reads);
    trunc(@writes);
    trunc(@readbytes);
    trunc(@writebytes);
    trunc(@t);
}
#!/usr/sbin/dtrace -s
#pragma D option quiet
/*
 * CDDL HEADER START
 *
 * The contents of this file are subject to the terms of the
 * Common Development and Distribution License (the "License").
 * You may not use this file except in compliance with the License.
 *
 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 * or http://www.opensolaris.org/os/licensing.
 * See the License for the specific language governing permissions
 * and limitations under the License.
 *
 * When distributing Covered Code, include this CDDL HEADER in each
 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
 * If applicable, add the following below this CDDL HEADER, with the
 * fields enclosed by brackets "[]" replaced with your own identifying
 * information: Portions Copyright [yyyy] [name of copyright owner]
 *
 * CDDL HEADER END
 *
 * Copyright (c) 2015 RackTop Systems.
 * 
 * nfsv3cmds.d
 * This script displays amount of NFSv3 IO and all operations, and their
 * count, by client connection, i.e. by unique IP address.
 */

dtrace:::BEGIN
{
    interval = 10;
    tick = interval;
    /* All known NFSv3 commands */
    commands["rfs3_commit"] = "COMMIT";
    commands["rfs3_pathconf"] = "PATHCONF";
    commands["rfs3_fsinfo"] = "FSINFO";
    commands["rfs3_fsstat"] = "FSSTAT";
    commands["rfs3_readdirplus"] = "READ_DIR+";
    commands["rfs3_readdir"] = "READ_DIR";
    commands["rfs3_link"] = "LINK";
    commands["rfs3_rename"] = "RENAME";
    commands["rfs3_rmdir"] = "RMDIR";
    commands["rfs3_remove"] = "REMOVE";
    commands["rfs3_mknod"] = "MKNODE";
    commands["rfs3_symlink"] = "SYMLINK";
    commands["rfs3_mkdir"] = "MKDIR";
    commands["rfs3_create"] = "CREATE";
    commands["rfs3_write"] = "WRITE";
    commands["rfs3_read"] = "READ";
    commands["rfs3_readlink"] = "READ_LINK";
    commands["rfs3_access"] = "ACCESS";
    commands["rfs3_lookup"] = "LOOKUP";
    commands["rfs3_setattr"] = "SET_ATTR";
    commands["rfs3_getattr"] = "GET_ATTR";
    commands["rpc_null_v3"] = "NULL";
}

nfsv3:::op-commit-done,
nfsv3:::op-pathconf-done,
nfsv3:::op-fsinfo-done,
nfsv3:::op-fsstat-done,
nfsv3:::op-readdirplus-done,
nfsv3:::op-readdir-done,
nfsv3:::op-link-done,
nfsv3:::op-rename-done,
nfsv3:::op-rmdir-done,
nfsv3:::op-remove-done,
nfsv3:::op-mknod-done,
nfsv3:::op-symlink-done,
nfsv3:::op-mkdir-done,
nfsv3:::op-create-done,
nfsv3:::op-write-done,
nfsv3:::op-read-done,
nfsv3:::op-readlink-done,
nfsv3:::op-access-done,
nfsv3:::op-lookup-done,
nfsv3:::op-setattr-done,
nfsv3:::op-getattr-done,
nfsv3:::op-null-done
{
    @ops[args[0]->ci_remote, commands[probefunc]] = count();
}

nfsv3:::op-read-done
{
    @readbytes[args[0]->ci_remote, 
        args[2]->res_u.ok.count] = sum(args[2]->res_u.ok.data.data_len);
}


nfsv3:::op-write-done
{
    @writebytes[args[0]->ci_remote, 
        args[2]->res_u.ok.count] = sum(args[2]->res_u.ok.count);
}

profile:::tick-1sec
/tick-- == 0/
{
    this->ts = walltimestamp;
    printf("---- Begin IO Byte Counts [%Y] ----\n", this->ts);
    printa("Client[R]: %s IO Size: %d Total bytes: %@d\n", @readbytes);
    printa("Client[W]: %s IO Size: %d Total bytes: %@d\n", @writebytes);
    printf("---- END Byte Counts ----\n\n");
    printf("\n---- Begin Operations [%Y] ----\n", this->ts);
    printa("Client: %s CMD: %s OP.Count: %@d\n", @ops);
    printf("---- END Operations ----\n\n");
    trunc(@readbytes);
    trunc(@writebytes);
    tick = interval;
}
dtrace -qn '
  /*
   * Register when a request is received from client 
   */ 
  nfsv3:::op-read-start,nfsv3:::op-write-start {
    self->x[args[1]->noi_xid] = timestamp;
    self->caddr[args[1]->noi_xid] = args[0]->ci_remote;
  }
  /*
   * Register when client acknowledges completion of operation
   */
  nfsv3:::op-read-done,nfsv3:::op-write-done /self->x[args[1]->noi_xid]/ {
    this->delta = timestamp - self->x[args[1]->noi_xid];
    @[self->caddr[args[1]->noi_xid]] = quantize(this->delta / 1000);
    self->x[args[1]->noi_xid] = 0;
    self->caddr[args[1]->noi_xid] = 0;
  }'

// Same as above, but splits commands into two histograms, one for reads and one for writes.
dtrace -qn '
string op[string];
BEGIN {
  op["op-read-done"] = "R";
  op["op-write-done"] = "W";
}
  /*
   * Register when a request is received from client 
   */ 
  nfsv3:::op-read-start,nfsv3:::op-write-start {
    self->x[args[1]->noi_xid] = timestamp;
    self->caddr[args[1]->noi_xid] = args[0]->ci_remote;
  }
  /*
   * Register when client acknowledges completion of operation
   */
  nfsv3:::op-read-done,nfsv3:::op-write-done /self->x[args[1]->noi_xid]/ {
    this->delta = timestamp - self->x[args[1]->noi_xid];
    @[op[probename], self->caddr[args[1]->noi_xid]] = quantize(this->delta / 1000);
    self->x[args[1]->noi_xid] = 0;
    self->caddr[args[1]->noi_xid] = 0;
  }'
dtrace -qCs <( print -r '
/*
 * Script is meant to very produce buckets of IOsize(s),
 * by size of IO, grouped by Random and Sequential.
 * Four histograms are generated, two for Read and Write command.
 * One histogram being all sequential IOs, and one being
 * all non-sequential IOs.
 */
#include <nfs/nfs.h>
  offset3 last_off[string];
  count3  last_count[string];
  int ok_r, ok_w;
  
  ::rfs3_read:entry /last_count["read"] == 0/ {
    last_off["read"] = args[0]->offset ;
    last_count["read"] = args[0]->count ;
    ok_r = 1;
  }
  ::rfs3_read:entry /ok_r/ {
    this->offset = args[0]->offset ;
    @rd[ last_off["read"] + last_count["read"] == this->offset ?
      "SEQ" : "RAND"] = quantize(args[0]->count);
    /* Update last offset and count with latest value. */
    last_off["read"] = this->offset ;
    last_count["read"] = args[0]->count ;
  }
  ::rfs3_write:entry /last_count["write"] == 0/ {
    last_off["write"] = args[0]->offset ;
    last_count["write"] = args[0]->count ;
    ok_w = 1;
  }
  ::rfs3_write:entry /ok_w/ {
    this->offset = args[0]->offset ;
    @wr[ last_off["write"] + last_count["write"] == this->offset ?
      "SEQ" : "RAND"] = quantize(args[0]->count);
    /* Update last offset and count with latest value. */
    last_off["write"] = this->offset ;
    last_count["write"] = args[0]->count ;
  }
  END {
    printa("\t[%s] Read(bytes): %@d\n", @rd);
    printa("\t[%s] Write(bytes): %@d\n", @wr);
  }'
)
/usr/sbin/dtrace -qn'
  #pragma D option aggsize=16m
  #pragma D option bufsize=8m
  #pragma D option dynvarsize=8m

  string typ[string];
  
  BEGIN 
  {
    printf("source,cmd,path,IOs,avg.lat.ns,bytes\n");
    typ["op-access-done"]      = "access";
    typ["op-commit-done"]      = "commit";
    typ["op-create-done"]      = "create";
    typ["op-fsinfo-done"]      = "fsinfo";
    typ["op-fsstat-done"]      = "fsstat";
    typ["op-getattr-done"]     = "getattr";
    typ["op-link-done"]        = "link";
    typ["op-lookup-done"]      = "lookup";
    typ["op-mkdir-done"]       = "mkdir";
    typ["op-mknod-done"]       = "mknod";
    typ["op-null-done"]        = "null";
    typ["op-pathconf-done"]    = "pathconf";
    typ["op-read-done"]        = "read";
    typ["op-readdir-done"]     = "readdir";
    typ["op-readdirplus-done"] = "readdirplus";
    typ["op-readlink-done"]    = "readlink";
    typ["op-remove-done"]      = "remove";
    typ["op-rename-done"]      = "rename";
    typ["op-rmdir-done"]       = "rmdir";
    typ["op-setattr-done"]     = "setattr";
    typ["op-symlink-done"]     = "symlink";
    typ["op-write-done"]       = "write";
  }
   
  nfsv3:::op-access-start,
  nfsv3:::op-commit-start,
  nfsv3:::op-create-start,
  nfsv3:::op-fsinfo-start,
  nfsv3:::op-fsstat-start,
  nfsv3:::op-getattr-start,
  nfsv3:::op-link-start,
  nfsv3:::op-lookup-start,
  nfsv3:::op-mkdir-start,
  nfsv3:::op-mknod-start,
  nfsv3:::op-null-start,
  nfsv3:::op-pathconf-start,
  nfsv3:::op-read-start,
  nfsv3:::op-readdir-start,
  nfsv3:::op-readdirplus-start,
  nfsv3:::op-readlink-start,
  nfsv3:::op-remove-start,
  nfsv3:::op-rename-start,
  nfsv3:::op-rmdir-start,
  nfsv3:::op-setattr-start,
  nfsv3:::op-symlink-start,
  nfsv3:::op-write-start
  {
    self->src_addr = args[0]->ci_remote;
    tm[self->src_addr, args[1]->noi_xid] = timestamp;

    self->have_path = args[1]->noi_curpath == NULL ? 0 : 1;
    
    /* We should make sure we handle empty value as well */
    self->path = args[1]->noi_curpath != "" ? 
              args[1]->noi_curpath  : "NOPATH";
  }

  nfsv3:::op-create-start,
  nfsv3:::op-mkdir-start,
  nfsv3:::op-mknod-start
  / self->have_path && self->path != "NOPATH" && self->path != "<unknown>" /
  {
    self->path = strjoin(strjoin(self->path, "/"), args[2]->where.name)
  }
  
  nfsv3:::op-lookup-start
  / self->have_path && self->path != "NOPATH" && self->path != "<unknown>" /
  {
    self->path = strjoin(strjoin(self->path, "/"), args[2]->what.name)
  }
  
  nfsv3:::op-remove-start,
  nfsv3:::op-rmdir-start
  / self->have_path && self->path != "NOPATH" && self->path != "<unknown>" /
  {
    self->path = strjoin(strjoin(self->path, "/"), args[2]->object.name)
  }
  
  nfsv3:::op-rename-start
  / self->have_path && self->path != "NOPATH" && self->path != "<unknown>" /
  {
    self->path = strjoin(strjoin(self->path, "/"), args[2]->from.name)
  }
  
  nfsv3:::op-symlink-start
  / self->have_path && self->path != "NOPATH" && self->path != "<unknown>" /
  {
    self->path = strjoin(strjoin(self->path, "/"), args[2]->symlink.symlink_data)
  }
  
  nfsv3:::op-read-done,
  nfsv3:::op-write-done
  / self->have_path && tm[self->src_addr, args[1]->noi_xid] /
  {
    self->bytes = args[2]->res_u.ok.count ;

    @ios[self->src_addr, typ[probename], self->path]    = count();
    @lat_us[self->src_addr, typ[probename], self->path] = 
          avg(timestamp - tm[self->src_addr, args[1]->noi_xid]);
    @s[self->src_addr, typ[probename], self->path]      = sum(self->bytes);
    ++data;
  }

  nfsv3:::op-access-done,
  nfsv3:::op-commit-done,
  nfsv3:::op-create-done,
  nfsv3:::op-fsinfo-done,
  nfsv3:::op-fsstat-done,
  nfsv3:::op-getattr-done,
  nfsv3:::op-link-done,
  nfsv3:::op-lookup-done,
  nfsv3:::op-mkdir-done,
  nfsv3:::op-mknod-done,
  nfsv3:::op-null-done,
  nfsv3:::op-pathconf-done,
  nfsv3:::op-readdir-done,
  nfsv3:::op-readdirplus-done,
  nfsv3:::op-readlink-done,
  nfsv3:::op-remove-done,
  nfsv3:::op-rename-done,
  nfsv3:::op-rmdir-done,
  nfsv3:::op-setattr-done,
  nfsv3:::op-symlink-done
  / self->have_path && tm[self->src_addr, args[1]->noi_xid] /
  {
    @ios[self->src_addr, typ[probename], self->path]    = count();
    @lat_us[self->src_addr, typ[probename], self->path] =
      avg(timestamp - tm[self->src_addr, args[1]->noi_xid]);
    @s[self->src_addr, typ[probename], self->path]      = sum(0);
    ++data;
  }
  
  nfsv3:::op-access-done,
  nfsv3:::op-commit-done,
  nfsv3:::op-create-done,
  nfsv3:::op-fsinfo-done,
  nfsv3:::op-fsstat-done,
  nfsv3:::op-getattr-done,
  nfsv3:::op-link-done,
  nfsv3:::op-lookup-done,
  nfsv3:::op-mkdir-done,
  nfsv3:::op-mknod-done,
  nfsv3:::op-null-done,
  nfsv3:::op-pathconf-done,
  nfsv3:::op-read-done,
  nfsv3:::op-readdir-done,
  nfsv3:::op-readdirplus-done,
  nfsv3:::op-readlink-done,
  nfsv3:::op-remove-done,
  nfsv3:::op-rename-done,
  nfsv3:::op-rmdir-done,
  nfsv3:::op-setattr-done,
  nfsv3:::op-symlink-done,
  nfsv3:::op-write-done
  {
    /* Zero out things to make sure we avoid issues with dynvar drops */
    self->bytes = 0;
    self->have_path = 0;
    tm[self->src_addr, args[1]->noi_xid] = 0;
    self->src_addr = 0;
    self->path = 0;
  }

  tick-1sec /data/
  {
    /*
     * Shrink each aggregation to 100 observations first, then print those
     * 100 observations, which will not mean we are limited to 100 unique
     * connection addresses, or 100 unique paths. As it stands, aggregations
     * right now use a tuple for their key, which is a set of three values:
     * client address, command, path. This means that in theory we could have:
     * (client address) x (command) x (path) number of unique keys.
     */
    trunc(@ios, 100); trunc(@lat_us, 100); trunc(@s, 100);
    /* normalize(@lat_us, data); */
    
    printa("%s,%s,%s,%@d,%@d,%@d\n", @ios, @lat_us, @s);
    trunc(@ios); trunc(@lat_us); trunc(@s);
    data = 0;
  }'
dtrace -qn '
    BEGIN {
        x["op-read-start"] = "Read" ;
        x["op-write-start"] = "Write" ;
    }
    nfsv3:::op-read-start { self->offs = args[2]->offset ; self->sz = args[2]->count ; }
    nfsv3:::op-write-start { self->offs = args[2]->offset ; self->sz = args[2]->data.data_len ; }

    nfsv3:::op-read-start,nfsv3:::op-write-start
    /self->sz <= 0x1000 && self->offs/ {
        @[x[probename], "4K"] = quantize(self->offs);
        self->offs = 0 ; self->sz = 0 ;
    }
    nfsv3:::op-read-start,nfsv3:::op-write-start 
    /self->sz > 0x1000 && self->sz <= 0x2000 && self->offs/ {
        @[x[probename], "8K"] = quantize(self->offs);
        self->offs = 0 ; self->sz = 0 ;
    }
    nfsv3:::op-read-start,nfsv3:::op-write-start
    /self->sz > 0x2000 && self->sz <= 0x4000 && self->offs/ {
        @[x[probename], "16K"] = quantize(self->offs);
        self->offs = 0 ; self->sz = 0 ;
    }
    nfsv3:::op-read-start,nfsv3:::op-write-start
    /self->sz > 0x4000 && self->sz <= 0x8000 && self->offs/ {
        @[x[probename], "32K"] = quantize(self->offs);
        self->offs = 0 ; self->sz = 0 ;
    }
    nfsv3:::op-read-start,nfsv3:::op-write-start
    /self->sz > 0x8000 && self->offs/ {
        @[x[probename], ">32K"] = quantize(self->offs);
        self->offs = 0 ; self->sz = 0 ;
    }
    END {
    printa("[%s] IOsize(%s): %@d\n", @);
    }'
dtrace -qn '
  long ios_per_sec[string];
  long prev_timestamp[string];
  inline const int NSEC_PER_SEC = 1000000000;
  BEGIN {
      start = timestamp ;
      prev_timestamp["op-read-start"] = start;
      prev_timestamp["op-write-start"] = start;
  }
  nfsv3:::op-read-start {
    self->sz = args[2]->count;
    self->ipaddr = inet_ntoa(&((struct sockaddr_in *)((struct svc_req *)arg0)->rq_xprt->xp_xpc.xpc_rtaddr.buf)->sin_addr.S_un.S_addr);
    this->port = ((struct sockaddr_in *)((struct svc_req *)arg0)->rq_xprt->xp_xpc.xpc_rtaddr.buf)->sin_port;
  }
  nfsv3:::op-write-start {
    self->sz = args[2]->data.data_len;
    self->ipaddr = inet_ntoa(&((struct sockaddr_in *)((struct svc_req *)arg0)->rq_xprt->xp_xpc.xpc_rtaddr.buf)->sin_addr.S_un.S_addr);
    this->port = ((struct sockaddr_in *)((struct svc_req *)arg0)->rq_xprt->xp_xpc.xpc_rtaddr.buf)->sin_port;
  }
  nfsv3:::op-read-start,nfsv3:::op-write-start
  /self->sz && self->ipaddr != NULL/ {
    this->dir = probename == "op-read-start" ? "READ" : "WRITE" ;
    @bukit[self->ipaddr, this->dir] = quantize(self->sz);
    @tot[self->ipaddr, this->dir]   = sum(self->sz);
    @avsz[self->ipaddr, this->dir]  = avg(self->sz);
    @minsz[self->ipaddr, this->dir] = min(self->sz);
    @maxsz[self->ipaddr, this->dir] = max(self->sz);
    @ct[self->ipaddr, this->dir]    = count();
    self->sz = 0;
    self->ipaddr = 0;
    ioct[this->dir]++ ;
  }
  nfsv3:::op-read-start,nfsv3:::op-write-start
  /timestamp >= (prev_timestamp[probename] + 1000000000) / {
    this->now = timestamp ;
    this->delta = this->now - prev_timestamp[probename];
    this->dir = probename == "op-read-start" ? "READ" : "WRITE" ;
    this->perSec = (ios_per_sec[this->dir] * NSEC_PER_SEC) / this->delta ;
    prev_timestamp[probename] = this->now ; /* update timestamp for next run */
    @ios_dist[this->dir, "Second"] = quantize(ioct[this->dir]);
    ioct[this->dir] = 0; /* reset counter for next second */
  }
  END {
    this->stop = timestamp;
    normalize(@tot, 1024*1024);
    normalize(@maxsz, 1024);
    normalize(@avsz, 1024);
    printa("\t   IOsize(b)\t\tClient=%s Op=%s%@d\n", @bukit);
    printf("\n----------------------------------------\n\n");
    printa("           %-5s IOs per %s%@d\n", @ios_dist);
    printf("----------------------------------------\n\n");
    printa("%-15s %s total(MB)=%@d min(b)=%@d max(KB)=%@d avg(KB)=%@d issued=%@d \n", 
           @tot, @minsz, @maxsz, @avsz, @ct);
    printf("--------------------\nTotal Runtime: %d seconds\n",
            (this->stop - start)/ NSEC_PER_SEC);
  }'
// NFSv3 Read/Write offsets by type of IO (R/W), quantized.
dtrace -qn '
  BEGIN {
    op["op-read-start"] = "READ Offset(KB)"; 
    op["op-write-start"] = "WRITE Offset(KB)"; 
    printf("Tracing file offsets... Ctrl-C to End\n");
  }
  nfsv3:::op-read-start,
  nfsv3:::op-write-start {
    @[op[probename]] = quantize(args[2]->offset >> 10)
  }
  END {
  printa(@);
  }'
#!/usr/bin/ksh
#
# show affect of alignment
#
### default variables
opt_blocksize=4096  # blocksize for alignment measurements

### process options
while getopts b:h name
do
    case $name in
        b)  opt_blocksize=$OPTARG ;;
	h)  cat <<END >&2
USAGE: nfsalignment [-b blocksize]
    -b blocksize # alignment blocksize (default=${opt_blocksize})
END
        exit 1
    esac
done

dtrace -qn '
inline int OPT_blocksize    = '$opt_blocksize';
inline int READ_ALIGNED     = 0;
inline int WRITE_ALIGNED    = 1;
inline int READ_UNALIGNED   = 2;
inline int WRITE_UNALIGNED  = 3;

int self align_cnt[int];

dtrace:::BEGIN
{
    printf("tracing... press [ctrl]+C when done\n");
    rw["op-read-start"]     = "R";
    rw["op-write-start"]    = "W"; 
}
nfsv3:::op-read-start,
nfsv3:::op-write-start,
nfsv4:::op-read-start,
nfsv4:::op-write-start
{
    this->ts=timestamp;
    this->align=args[2]->offset % OPT_blocksize;
    /* todo: option for offset normalization */
    /* this->align=this->align > 2048 ? this->align=4096 - this->align
        : this->align;
    */
}
nfsv3:::op-read-start,
nfsv4:::op-read-start
{
    this->aligned = args[2]->offset % OPT_blocksize == 0 ? 1 : 0;
    this->unaligned = args[2]->offset % OPT_blocksize != 0 ? 1 : 0;
    self->align_cnt[READ_ALIGNED]     += this->aligned;
    self->align_cnt[READ_UNALIGNED]   += this->unaligned;
}
nfsv3:::op-write-start,
nfsv4:::op-write-start
{
    this->aligned = args[2]->offset % OPT_blocksize == 0 ? 1 : 0;
    this->unaligned = args[2]->offset % OPT_blocksize != 0 ? 1 : 0;
    self->align_cnt[WRITE_ALIGNED]     += this->aligned;
    self->align_cnt[WRITE_UNALIGNED]   += this->unaligned;
}
/* direction of I/O */
nfsv3:::op-read-start,
nfsv4:::op-read-start
nfsv3:::op-write-start,
nfsv4:::op-write-start
{
    this->rw = rw[probename];
    this->fh = args[1]->noi_curpath;
}
/* version of I/O */
nfsv3:::op-read-start,
nfsv3:::op-write-start
{
    this->vers="3";
}
nfsv4:::op-read-start,
nfsv4:::op-write-start
{
    this->vers="4";
}
nfsv3:::op-read-done,
nfsv3:::op-write-done,
nfsv4:::op-read-done,
nfsv4:::op-write-done
/this->ts && this->rw != NULL/
{
    @t[this->rw, this->vers, this->align, this->fh] =
        avg((timestamp - this->ts)/1000);
    @c[this->rw, this->vers, this->align, this->fh] = count();
    this->ts=0;

    /* Aligned IOs Count */
    @a[this->rw, this->vers] = sum(this->rw == "R" ?    
        self->align_cnt[READ_ALIGNED] : 
            self->align_cnt[WRITE_ALIGNED]);

    /* Unaligned IOs Count */
    @u[this->rw, this->vers] = sum(this->rw == "R" ? 
        self->align_cnt[READ_UNALIGNED] :
            self->align_cnt[WRITE_UNALIGNED]);

    self->align_cnt[READ_ALIGNED]       = 0;
    self->align_cnt[READ_UNALIGNED]     = 0;
    self->align_cnt[WRITE_ALIGNED]      = 0;
    self->align_cnt[WRITE_UNALIGNED]    = 0;
}
dtrace:::END
{
    printf("NFS alignment analysis for blocksize=%d\n", OPT_blocksize);
    printf("Sample end time %Y\n", walltimestamp);
    /* Report on total aligned and unaligned I/Os */
    printa("(%s) Count I/Os Aligned   NFSv%s => %@d\n", @a);
    printa("(%s) Count I/Os Unaligned NFSv%s => %@d\n", @u);
    printf("%8s %8s %20s %10s %20s %20s\n",
        "r/w", "version", "misalignment (bytes)","Count", "Avg_t (us)", "Path");
    printa("%8s %8s %20d %10@d %20@d                 %20s\n", @c, @t);
    
}'
dtrace -qn '
  BEGIN {
    x["op-read-start"] = "Read" ;
    x["op-write-start"] = "Write" ;
  }
  nfsv3:::op-read-start { self->sz = args[2]->count }
  nfsv3:::op-write-start { self->sz = args[2]->data.data_len }
  nfsv3:::op-read-start,nfsv3:::op-write-start /self->sz/ {
    @[x[probename]] = quantize(self->sz);
    self->sz = 0;
  }
  END {
    printa("[%s] IOsize(b): %@d\n", @);
  }'
// Observe by name of file a distribution of offsets within a file beeing seeked.
// This is useful for observing VMs. Offset distance is in bytes from start of file.
dtrace -qn 'nfsv3:::op-read-start,nfsv3:::op-write-start {
	@[args[1]->noi_curpath] = quantize(args[2]->offset >> 10)
	}'

// Observe offsets withing a file being seeked by iosize. 
// This is useful when a load generation tool is creating a known iosize.
dtrace -qn '
	BEGIN {
	    m["op-read-start"] = "R" ;
	    m["op-write-start"] = "W" ;
	}
    nfsv3:::op-read-start {self->sz = args[2]->count}
    nfsv3:::op-write-start {self->sz = args[2]->data.data_len}
    nfsv3:::op-read-start,nfsv3:::op-write-start /self->sz/ {
        /* @[args[1]->noi_curpath] = quantize(args[2]->offset >> 10); */
        @[m[probename], self->sz] = quantize(args[2]->offset >> 10);
        self->sz = 0;
    }
    END {
    	trunc(@, 5);
    	printa("[%s] IOsize: %d bytes\nOffset Distance(KB):%@d\n", @);
    }'
dtrace -qn '
    #pragma D option aggsize=16m
    #pragma D option dynvarsize=38m

    int self lastR[string];
    int self lastW[string];
    
    BEGIN {
            cmd["op-read-start"] = "READ";
            cmd["op-write-start"] = "WRITE";
            cmd["op-read-done"] = "READ";
            cmd["op-write-done"] = "WRITE";
            reads = 0;
            writes = 0;
            printf("timestamp,cmd,path,iosize,avg.offset.kb,min.offset.kb,max.offset.kb,elapsed.us\n");
    }

    tick-1sec {
        ts = walltimestamp;
    }
    /* 
     * The distance distance value we store in this->distance variable
     * is number of bytes that that we skipped from previous offset,
     * plus number of bytes read, or written from previous offset.
     * If last offset was say 2048 with 1KB IO size, we will be measuring
     * jump distance from (2048 + 1024) - new offset. Jump distance is 
     * not strictly distance between two consecutive offsets, but a
     * distance between last actual position of cursor as a result of 
     * previous read or write.
     */
    nfsv3:::op-read-start /ts/ {
        this->len = args[2]->count; /* how much to read */
        this->distance = self->lastW[args[1]->noi_curpath] == NULL ? args[2]->offset : 
            self->lastR[args[1]->noi_curpath] > args[2]->offset ?
            self->lastR[args[1]->noi_curpath] - args[2]->offset :
                -self->lastR[args[1]->noi_curpath] + args[2]->offset;

        /* Preserve current offset, after this->len bytes read */
        self->lastR[args[1]->noi_curpath] = args[2]->offset + this->len;
        
        @avgjmp[ts, cmd[probename], args[1]->noi_curpath, this->len]  =     
            avg(this->distance);
        @minjmp[ts, cmd[probename], args[1]->noi_curpath, this->len]  =     
            min(this->distance);
        @maxjmp[ts, cmd[probename], args[1]->noi_curpath, this->len]  = 
            max(this->distance);
        /* @sdjmp[ts, cmd[probename], args[1]->noi_curpath, this->len]  = stddev(this->distance); */
        /* this->len = 0; */
        this->distance = 0;
        this->relt = timestamp;
    }

    nfsv3:::op-write-start /ts/ {
        this->len = args[2]->data.data_len; /* how much to write */
        this->distance = self->lastW[args[1]->noi_curpath] == NULL ? args[2]->offset : 
            self->lastW[args[1]->noi_curpath] > args[2]->offset ?
            self->lastW[args[1]->noi_curpath] - args[2]->offset :
                -self->lastW[args[1]->noi_curpath] + args[2]->offset;

        /* Preserve current offset, after this->len bytes written */
        self->lastW[args[1]->noi_curpath] = args[2]->offset + this->len;
        
        @avgjmp[ts, cmd[probename], args[1]->noi_curpath, this->len]  = 
            avg(this->distance);
        @minjmp[ts, cmd[probename], args[1]->noi_curpath, this->len]  = 
            min(this->distance);
        @maxjmp[ts, cmd[probename], args[1]->noi_curpath, this->len]  = 
            max(this->distance);
        /* @sdjmp[ts, cmd[probename], args[1]->noi_curpath, this->len]   = stddev(this->distance); */
        /* this->len = 0; */
        this->distance = 0;
        this->relt = timestamp;
    }
    
    nfsv3:::op-read-done,nfsv3:::op-write-done /this->relt/ {
        /* Measure average amount of time from start of request to finish. */
        @avglat[ts, cmd[probename], args[1]->noi_curpath, this->len]  = 
            avg((timestamp - this->relt) / 1000);
        
        this->relt = 0;
        this->len = 0;
    }

    tick-1sec {
        normalize(@avgjmp, 1024);
        normalize(@minjmp, 1024);
        normalize(@maxjmp, 1024);
        printa("%Y,%s,%s,%d,%@d,%@d,%@d,%@d\n", 
                @avgjmp, @minjmp, @maxjmp, @avglat); 
        trunc(@minjmp); trunc(@maxjmp); 
        trunc(@avgjmp); trunc(@avglat);
        /* trunc(@sdjmp); */
    }'
dtrace -qn '
  /*
   * Measure how random or sequential NFSv3 IO is, and bucket it by
   * client IP address. It is useful to know which sizes are dominant
   * when IOs have consistent offset distance from previous offset,
   * i.e. io is sequential, as opposed to what sizes have constantly
   * varied IO distances.
   */
  long offset[string, string, string];

  BEGIN {
    map["op-read-start"] = "Read";
    map["op-write-start"] = "Write";
  }
  nfsv3:::op-read-start,nfsv3:::op-write-start
  /!offset[map[probename], args[0]->ci_remote, args[1]->noi_curpath]/ {
    offset[map[probename], args[0]->ci_remote, args[1]->noi_curpath] = args[2]->offset;
  }
  nfsv3:::op-read-start,nfsv3:::op-write-start
  /offset[map[probename], args[0]->ci_remote,
  args[1]->noi_curpath] != args[2]->offset/ {
    this->dist = args[2]->offset > offset[map[probename],
    args[0]->ci_remote, args[1]->noi_curpath] ?
    args[2]->offset - offset[map[probename],
    args[0]->ci_remote, args[1]->noi_curpath] :
    offset[map[probename], args[0]->ci_remote,
    args[1]->noi_curpath] - args[2]->offset;
    @h[map[probename], args[0]->ci_remote] = quantize(this->dist);
    offset[map[probename], args[0]->ci_remote,
    args[1]->noi_curpath] =
    args[2]->offset;
  }
  END {printa("%s\nIP: %s%@d\n", @h);}'
dtrace -qn '
/* This script is useful for observing any system which uses NFS,
 * since this is NFSv3 only, at the moment. The script prints out
 * histograms of important metrics, like distances of seeks, and whether
 * or not those seeks happen in fixed offsets, which would normally suggest
 * that IO is not random in nature. If the distances are always very close,
 * it is worth looking into whether using larger record sizes on datasets
 * can improve performance, in particular if reads are often concentrated
 * in narrow bands within files. This tool is file based, which is most 
 * meaningful for us. It does not really make sense to compare offsets between
 * different files, since results will be nonsense.
 *
 * We use long for f[fileid3], becase the array stores offsets which are
 * of type offset3, which is an alias for uint64.
 */
long f[fileid3]; int lastdist[fileid3];

/* Store file ID and use it to key off of. */
:nfssrv:rfs3_read:entry {
  this->fid = (*args[1]).res_u.ok.file_attributes.attr.fileid ; 
}
:nfssrv:rfs3_read:entry / f[this->fid] == NULL / {
  f[this->fid] = args[0]->offset ;
  lastdist[this->fid] = 0 ;
}
:nfssrv:rfs3_read:entry
/ f[this->fid] != NULL && args[0]->offset != f[this->fid] / {
  this->prev_off  = f[this->fid] ;
  this->curr_off  = args[0]->offset ;
  this->gt        = this->curr_off > this->prev_off ;
  this->dist      = this->gt ? (this->curr_off - this->prev_off) :
                    -(this->curr_off - this->prev_off) ;
  /*
   * Uncomment below to troubleshoot, if offsets appear to be wrong.  
   * printf("0x%x 0x%x dist: 0x%x\n", 
   * this->prev_off, this->curr_off, this->dist);
   */
  @seq["READ"]    = quantize(this->dist == lastdist[this->fid]) ;
  @d["READ"]      = quantize(this->dist >> 10) ;
  @gt32["READ"]   = quantize(this->dist > 0x8000) ;
  @gt64["READ"]   = quantize(this->dist > 0x10000) ;
  @gt128["READ"]  = quantize(this->dist > 0x20000) ;
  @gt1m["READ"]   = quantize(this->dist > 0x100000) ;
  /* Update offset in the map and move on */
  f[this->fid]    = args[0]->offset ;
  lastdist[this->fid] = this->dist ;
}

:nfssrv:rfs3_write:entry {
  this->fid = (*args[1]).res_u.ok.file_wcc.after.attr.fileid ; 
}
:nfssrv:rfs3_write:entry / f[this->fid] == NULL / {
  f[this->fid] = args[0]->offset ;
  lastdist[this->fid] = 0 ;
}
:nfssrv:rfs3_write:entry
/ f[this->fid] != NULL && args[0]->offset != f[this->fid] / {
  this->prev_off  = f[this->fid] ;
  this->curr_off  = args[0]->offset ;
  this->gt        = this->curr_off > this->prev_off ;
  this->dist      = this->gt ? (this->curr_off - this->prev_off) :
                    -(this->curr_off - this->prev_off) ;
  /*
   * Uncomment below to troubleshoot, if offsets appear to be wrong.  
   * printf("0x%x 0x%x dist: 0x%x\n", 
   * this->prev_off, this->curr_off, this->dist);
   */
  @seq["WRITE"]   = quantize(this->dist == lastdist[this->fid]) ;
  @d["WRITE"]     = quantize(this->dist >> 10) ;
  @gt32["WRITE"]  = quantize(this->dist > 0x8000) ;
  @gt64["WRITE"]  = quantize(this->dist > 0x10000) ;
  @gt128["WRITE"] = quantize(this->dist > 0x20000) ;
  @gt1m["WRITE"]  = quantize(this->dist > 0x100000) ;
  /* Update offset in the map and move on */
  f[this->fid]    = args[0]->offset ;
  lastdist[this->fid] = this->dist ;
}
END {
  printa("[%s] Direction:\nSeq == 1 | Random == 0\n%@d\n", @seq) ;
  printa("[%s] Seek Dist (KB): %@d\n", @d) ;
  printa("[%s] Seek >  32K:\nYes == 1 | No == 0\n%@d\n", @gt32) ;
  printa("[%s] Seek >  64K:\nYes == 1 | No == 0\n%@d\n", @gt64) ;
  printa("[%s] Seek > 128K:\nYes == 1 | No == 0\n%@d\n", @gt128) ;
  printa("[%s] Seek >   1M:\nYes == 1 | No == 0\n%@d\n", @gt1m) ;
}'
dtrace -qn '
BEGIN {
    period = 5000; /* Results are printed every 5 seconds */
    /* How stable data is expected to be. */
    stable_how[0] = "NOSYNC";
    stable_how[1] = "DSYNC";
    stable_how[2] = "FSYNC";
    ts = walltimestamp;
}
nfsv3:::op-read-start, 
nfsv3:::op-write-start,
nfsv4:::op-read-start {
    tm[args[0]->ci_remote, args[1]->noi_xid] = timestamp;
    opsz[args[0]->ci_remote, args[1]->noi_xid] = args[2]->count;
    flag[args[0]->ci_remote, args[1]->noi_xid] = 0;
    self->nfs=1;
}
/* sdt:::arc-miss gives info on direct users of the arc 
 *  if arc-miss, then we do an I/O */

sdt:::arc-miss /self->nfs/ { self->io = 1; self->nfs = 0 ;}

nfsv3:::op-write-start {
    flag[args[0]->ci_remote,args[1]->noi_xid] = args[2]->stable;
}
nfsv4:::op-write-start {
    flag[args[0]->ci_remote,args[1]->noi_xid] = args[2]->stable;
    tm[args[0]->ci_remote,args[1]->noi_xid] = timestamp;
    opsz[args[0]->ci_remote,args[1]->noi_xid] = args[2]->data_len;
}
nfsv3:::op-read-done, 
nfsv3:::op-write-done, 
nfsv4:::op-read-done, 
nfsv4:::op-write-done
/tm[args[0]->ci_remote,args[1]->noi_xid]/
{
        /* Total time from start of operation to end of same operation. */
        this->delta = (timestamp - tm[args[0]->ci_remote, 
                        args[1]->noi_xid]) / 1000;
        /* Flag indicates volatility mode for this operation, i.e.
         * `Unstable`, `Data Sync`, or `File Sync`.
         */
        this->flag =  flag[args[0]->ci_remote,args[1]->noi_xid];
        this->file =  args[1]->noi_curpath;

        this->type =  probename == "op-write-done" ? "W" : "R";
        /*   self->io ? "uncached" : "cached"  */
        this->type_cache = (probename == "op-write-done") ? 
                            "W": self->io ? "R" : "CR" ;

    /* ipaddr=ip[args[1]->noi_xid]; */
        this->ipaddr = inet_ntoa(&((struct sockaddr_in *)((struct svc_req *)arg0)->
            rq_xprt->xp_xpc.xpc_rtaddr.buf)->sin_addr.S_un.S_addr);
        this->port = ((struct sockaddr_in *)((struct svc_req *)arg0)->
            rq_xprt->xp_xpc.xpc_rtaddr.buf)->sin_port;

        
        @nfs_fio[ts, this->type, this->file, this->ipaddr, this->port] = 
            sum(opsz[args[0]->ci_remote, args[1]->noi_xid]);
        
        /* 
        @nfs_fir["R", this->file, this->ipaddr, this->port] = 
                sum((this->type == "R" ? 
                    opsz[args[0]->ci_remote, args[1]->noi_xid] : 0));
        
        @nfs_fiw["W", this->file, this->ipaddr, this->port] = 
                sum((this->type == "W" ? 
                    opsz[args[0]->ci_remote,args[1]->noi_xid] : 0));
        */
        
        /* store size along with max time so the can be correlated */
        /* this->overload = ( (this->delta) * (1000*1000*1000) +
                            opsz[args[0]->ci_remote,args[1]->noi_xid]);
        @nfs_mx[this->flag,"R"]=max( (this->type == "R" ? this->overload : 0));
        @nfs_mx[this->flag,"W"]=max( (this->type == "W" ? this->overload : 0));
        */
        /* Total Completion latency by type of IO and stability flag. */
        @nfs_qtclat[stable_how[this->flag], 
                this->type] = quantize(this->delta);

        this->size = opsz[args[0]->ci_remote,args[1]->noi_xid];
        this->buck = 512; /* We assume this is smallest possible size */
        this->buck = ( this->size > 512 ) ? 1024 : this->buck ;
        this->buck = ( this->size > 1024 ) ? 2048 : this->buck ;
        this->buck = ( this->size > 4096 ) ? 8192 : this->buck ;
        this->buck = ( this->size > 8192 ) ?  16384 : this->buck ;
        this->buck = ( this->size > 16384 ) ?  32768 : this->buck ;
        this->buck = ( this->size > 32768 ) ?  65536 : this->buck ;
        this->buck = ( this->size > 65536 ) ?  131072 : this->buck ;
        this->buck = ( this->size > 131072 ) ?  262144 : this->buck ;
        this->buck = ( this->size > 262144 ) ?  524288 : this->buck ;
        this->buck = ( this->size > 524288 ) ?  1048576 : this->buck ;
        /* this->type is a string, representing a Read, Write or Cache */
        @nfs_qtmsz[this->buck, this->type] = quantize(this->delta);
        @nfs_avtmsz[this->buck, this->type] = avg(this->delta);
        @nfs_cttmsz[this->buck, this->type] = count();

        @nfs_ctm[stable_how[this->flag], this->type_cache] = sum(this->delta); 
        @nfs_ct[stable_how[this->flag], this->type_cache] = count();
        @nfs_sz[stable_how[this->flag], this->type_cache] = sum(this->size);

        tm[args[0]->ci_remote,args[1]->noi_xid] = 0;
        opsz[args[0]->ci_remote,args[1]->noi_xid] = 0;
        flag[args[0]->ci_remote,args[1]->noi_xid]=0;
        self->io = 0;
}
tick-15sec {
    normalize(@nfs_sz, 1024);
    printf("\t---- NFS Total Size of Data by IO Type ----\n");
    printf("\tPossible Types: Cached Read=>CR|Read=>R|Write=>W\n");
    printa("\tStability: %-10s IO Type: %-2s KBytes: %@d\n", @nfs_sz);
    printf("\t---- End NFS Total Size of Data by IO Type ----\n\n");

    printf("\t---- NFS Total Time by IO Type ----\n");
    printf("\tPossible Types: Cached Read=>CR|Read=>R|Write=>W\n");
    printa("\tStability: %-10s IO Type: %-2s Time(uS): %@d\n", @nfs_ctm);
    printf("\t---- End NFS Total Time by IO Type ----\n\n");

    printf("\t---- NFS Data IOs Total by IO Type ----\n");
    printf("\tPossible Types: Cached Read=>CR|Read=>R|Write=>W\n");
    printa("\tStability: %-10s IO Type: %-2s Count: %@d\n", @nfs_ct);
    printf("\t---- End NFS Data IOs Time by IO Type ----\n\n");
    
    printf("\t---- NFS Normalized Average Time ----\n");
    printa("\tBucket(bytes): %-7d IO Type: %-2s Latency(uS): %@d\n",
    @nfs_avtmsz);
    printf("\t---- End NFS Normalized Average Time ----\n\n");

    printf("\t---- Completion Latency by Size and IO Type ----\n");
    printa("\t[%d] bytes\n\tIO Type: %-2s\n\tLatency(uS): %@d\n", 
          @nfs_qtmsz);
    printf("\t---- End Completion Latency by Size and IO Type ----\n\n");

    printf("\t---- Completion Latency Overall ----\n");
    printa("\tStability: %-10s\n\tIO Type: %-2s\n\tLatency(uS): %@d\n", 
          @nfs_qtclat);
    printf("\t---- End Latency Overall ----\n\n");

    printf("\t---- Count of IOs by Size and IO Type ----\n");
    printa("\tBucket(bytes): %-10d IO Type: %-2s %-8@d\n", @nfs_cttmsz);
    printf("\t---- End Count of IOs by Size and IO Type ----\n\n");

    printf("Timestamp, IO Type, File Path, Client IP Address, Port, Size(bytes)\n");
    printa("%-20Y %-2s %s %s %d %@d\n", @nfs_fio);

    trunc(@nfs_sz);
    trunc(@nfs_avtmsz);
    trunc(@nfs_qtmsz);
    trunc(@nfs_ctm);
    trunc(@nfs_ct);
    trunc(@nfs_cttmsz);
    trunc(@nfs_qtclat);
    trunc(@nfs_fio);
    ts = walltimestamp;
}'
dtrace -qn '
  /*
   * Measure how random or sequential NFSv3 IO is, and bucket it by
   * client IP address. It is useful to know which sizes are dominant
   * when IOs have consistent offset distance from previous offset,
   * i.e. io is sequential, as opposed to what sizes have constantly
   * varied IO distances.
   * Results should look something like this, where a count of both
   * is given for duration of this sample period.
   * CMD: Read IP: 172.16.1.34 RAND: 256 SEQ: 228
   * CMD: Write IP: 172.16.1.2 RAND: 1379 SEQ: 0
   * CMD: Write IP: 172.16.1.34 RAND: 1683 SEQ: 3
   */
  int lastdist[string, string, string];
  int lastpos[string, string, string];

  BEGIN {
    map["op-read-start"] = "Read";
    map["op-write-start"] = "Write";
  }
  nfsv3:::op-read-start,nfsv3:::op-write-start
  /!lastdist[map[probename], args[0]->ci_remote, args[1]->noi_curpath]/ {
    lastdist[map[probename], args[0]->ci_remote, args[1]->noi_curpath] = 0;
    lastpos[map[probename], args[0]->ci_remote, args[1]->noi_curpath] = 0;
  }
  nfsv3:::op-read-start,nfsv3:::op-write-start
  /lastpos[map[probename], args[0]->ci_remote, 
  args[1]->noi_curpath] != args[2]->offset/ {
    this->dist = args[2]->offset > lastpos[map[probename],
    args[0]->ci_remote, args[1]->noi_curpath] ?
    args[2]->offset - lastpos[map[probename],
    args[0]->ci_remote, args[1]->noi_curpath] :
    lastpos[map[probename], args[0]->ci_remote,
    args[1]->noi_curpath] - args[2]->offset;
    /* Determine wether previous step and last step were same, i.e. */
    this->seq = this->dist == 
    lastdist[map[probename], args[0]->ci_remote, args[1]->noi_curpath] ? 1 : 0;
    @rand[map[probename], args[0]->ci_remote] =
    sum(this->seq == 0 ? 1 : 0);
    @seq[map[probename], args[0]->ci_remote] =
    sum(this->seq == 1 ? 1 : 0);
    lastpos[map[probename], args[0]->ci_remote, args[1]->noi_curpath] =
    args[2]->offset;
    lastdist[map[probename], args[0]->ci_remote, args[1]->noi_curpath] = 
    this->dist;
  }
  END {printa("CMD: %s IP: %s RAND: %@d SEQ: %@d\n", @rand, @seq);}'
  dtrace -qn '
  int self ts[string, int];

    nfsv3:::op-read-start, nfsv3:::op-write-start, nfsv3:::op-commit-start {
      self->ts[args[0]->ci_remote, args[1]->noi_xid] = timestamp;
    }
    nfsv3:::op-read-done, nfsv3:::op-write-done, nfsv3:::op-commit-done
    /self->ts[args[0]->ci_remote, args[1]->noi_xid] / {
    @t[probefunc] = quantize((timestamp - 
            self->ts[args[0]->ci_remote, args[1]->noi_xid]) / 1000);
    self->ts[args[0]->ci_remote, args[1]->noi_xid] = 0;
    }
    tick-10sec {
      printa("%s\n%@d\n", @t);
      trunc(@t);
    }'
dtrace -qn '
    BEGIN {
        printf("timestamp,max.pend.reqs,max.actv.threads,percent.pool.free\n");
    }
    ::svc_xprt_qput:entry {
        enq = 1;
        this->p_reqs = args[0]->p_reqs; 
        /* ++this->p_reqs; This may be bumped on entry 
                        into routine, but after the entry probe is triggered. 
                        Further validation of this is necessary. 
        */
        this->pct_used = (args[0]->p_threads - args[0]->p_asleep) > 0 
        ? 100 * (args[0]->p_threads - args[0]->p_asleep) / args[0]->p_maxthreads
        : 1;
        this->pct_free = this->pct_used < 100 ? 100 - this->pct_used : 0;
        @p_reqs     = max(this->p_reqs);
        @actv_thr   = max(args[0]->p_threads - args[0]->p_asleep);
        @pct_free   = min(this->pct_free);
    }

    tick-5sec /enq/ {
        printf("%Y,", walltimestamp);
        printa("%@d,%@d,%@d\n", @p_reqs, @actv_thr, @pct_free);
        trunc(@p_reqs); 
        trunc(@actv_thr); 
        trunc(@pct_free);
        enq = 0;
    }'
dtrace -qn '
  self int x[int]; 
  int cnt1; 
  int cnt2;

  BEGIN {
    cnt1 = 0; /* should not be needed, but
    cnt2 = 0;  * just in case, extra-careful. */
  }
  
  nfsv3:::op-read-start {
    self->x[tid] = 1;
    ts[args[0]->ci_remote, args[1]->noi_xid] = timestamp;
  }
  nfsv3:::op-read-done /self->x[tid] > 0/ {
    ++cnt1; 
    self->x[tid] = 0;
  }
  nfsv3:::op-read-done /ts[args[0]->ci_remote, args[1]->noi_xid] > 0/ {
    ++cnt2;
    ts[args[0]->ci_remote, args[1]->noi_xid] = 0;
  }
  tick-10sec {printf("%d ?= %d\n", cnt1, cnt2); exit(0);}
 '
dtrace -qn '
  char m[string];
  BEGIN {
    m["rfs3_read"]  = 0x52;
    m["rfs3_write"] = 0x57;
  }
  ::rfs3_read:op-read-start {
    self->t = timestamp; self->arg2r = args[2];  self->a = args[0]->ci_remote;
    @qiosz[m[probefunc], args[0]->ci_remote] = quantize(args[2]->count);
  }
  ::rfs3_write:op-write-start {
    self->t = timestamp; self->arg2w = args[2]; self->a = args[0]->ci_remote;
    @qiosz[m[probefunc], args[0]->ci_remote] = quantize(args[2]->count);
  }
  ::rfs3_read:op-read-done /self->t/ {
    this->delta = (timestamp - self->t) / 1000;
    this->b = self->arg2r->count;
    @q[m[probefunc], self->a] = quantize(this->delta);
    
    @av[m[probefunc], self->a] = avg(this->delta);
    self->a = 0; self->s = 0; self->arg2r = 0;
  }
  ::rfs3_write:op-write-done /self->t/ {
    this->delta = (timestamp - self->t) / 1000;
    this->b = self->arg2w->data.data_len;
    @q[m[probefunc], self->a] = quantize(this->delta);
    
    @av[m[probefunc], self->a]  = avg(this->delta);
    self->a = 0; self->s = 0; self->arg2w = 0;
  }
  END {
    printa("\t   %c - IO size(b)  client: %s avg: %@d(b)%@d\n", @av, @qiosz);
    printa("\t   %c - latency(μS) client: %s avg: %@d(μS)%@d\n", @av, @q);
  }'