szaydel
11/14/2017 - 2:08 PM

[zfs_read and zfs_write Dtrace snippets] Measurements of various data from zfs_read and zfs_write functions #tags: zfs, zfs_read, zfs_write,

[zfs_read and zfs_write Dtrace snippets] Measurements of various data from zfs_read and zfs_write functions #tags: zfs, zfs_read, zfs_write, dtrace

dtrace -qn '
#pragma D option dynvarsize=8m

string direction[string] ;
int gotdata ;

  BEGIN {
    printf("timestamp,pool,ioType,lt100us,lt1000us,lt10000us,lt100ms,lt1000ms,gt1000ms,blkszle1k,blksz2k,blksz4k,blksz8k,blksz16k,blksz32k,blksz64k,blksz128k,blksz256k,blkszgt256k,ioCount\n") ;
    direction["zfs_read"]   = "R" ;
    direction["zfs_write"]  = "W" ;
    timenow = walltimestamp / 1000000000;
  }

  ::zfs_read:entry,::zfs_write:entry
  /strstr(strtok(args[0]->v_path, "/storage/"), "p") != NULL/ {
    self->b = args[1]->uio_resid ; /* Number of bytes to read or write */
    self->p = strtok(args[0]->v_path, "/storage/") ; /* Should be pool name */
    self->s = timestamp ;
    gotdata = 1 ;
}

  ::zfs_read:return,::zfs_write:return /self->s/ {
    /*
     * Delta is converted from nanoseconds to microseconds, since we
     * expect few IOs to complete in a matter of a few nanoseconds,
     * operating with microseconds istead makes sense.
     */
    this->delta = (timestamp - self->s) / 1000 ;
    this->n = self->p ;
    @cnt[timenow, stringof(this->n), direction[probefunc]] = count();
    /*
     * Histogram with 6 bins representing distribution of latencies
     * for issued IOs. Bins are from 1000us< to >1000ms.
     * We use the sum() function to increment by 1 each time a value 
     * matches width of a bucket, otherwise we leave unchanged.
     */
    @lt100us[timenow, stringof(this->n), direction[probefunc]] =
        sum(this->delta <= 100 ? 1 : 0) ;
    @lt1000us[timenow, stringof(this->n), direction[probefunc]] =
        sum(this->delta > 100 && this->delta <= 1000 ? 1 : 0) ;
    @lt10000us[timenow, stringof(this->n), direction[probefunc]] =
        sum(this->delta > 1000 && this->delta <= 10000 ? 1 : 0) ;
    @lt100ms[timenow, stringof(this->n), direction[probefunc]] =
        sum(this->delta > 10000 && this->delta <= 100000 ? 1 : 0) ;
    @lt1000ms[timenow, stringof(this->n), direction[probefunc]] =
        sum(this->delta > 100000 && this->delta <= 1000000 ? 1 : 0) ;
    @gt1000ms[timenow, stringof(this->n), direction[probefunc]] =
        sum(this->delta > 1000000 ? 1 : 0) ;
    /*
     * Histogram with 10 bins representing distibution of IO sizes 
     * for issued IOs. Bins are from 1K< to >256K.
     * We use the sum() function to increment by 1 each time a value 
     * matches width of a bucket, otherwise we leave unchanged.
     */

    @blkszle1k[timenow, stringof(this->n), direction[probefunc]] = 
        sum(self->b <= 0x400 ? 1 : 0) ;

    @blksz2k[timenow, stringof(this->n), direction[probefunc]] = 
        sum(self->b > 0x400 && self->b <= 0x800 ? 1 : 0) ;

    @blksz4k[timenow, stringof(this->n), direction[probefunc]] = 
        sum(self->b > 0x800 && self->b <= 0x1000 ? 1 : 0) ;
    
    @blksz8k[timenow, stringof(this->n), direction[probefunc]] = 
        sum(self->b > 0x1000 && self->b <= 0x2000 ? 1 : 0) ;
    
    @blksz16k[timenow, stringof(this->n), direction[probefunc]] = 
        sum(self->b > 0x2000 && self->b <= 0x4000 ? 1 : 0) ;
    
    @blksz32k[timenow, stringof(this->n), direction[probefunc]] = 
        sum(self->b > 0x4000 && self->b <= 0x8000 ? 1 : 0) ;
    
    @blksz64k[timenow, stringof(this->n), direction[probefunc]] = 
        sum(self->b > 0x8000 && self->b <= 0x10000 ? 1 : 0) ;

    @blksz128k[timenow, stringof(this->n), direction[probefunc]] = 
        sum(self->b > 0x10000 && self->b <= 0x20000 ? 1 : 0) ;

    @blksz256k[timenow, stringof(this->n), direction[probefunc]] = 
        sum(self->b > 0x20000 && self->b <= 0x40000 ? 1 : 0) ;

    @blkszgt256k[timenow, stringof(this->n), direction[probefunc]] = 
        sum(self->b > 0x40000 ? 1 : 0) ;

    self->b     = 0 ;
    self->p     = 0 ;
    self->s     = 0 ;
  }

  tick-1sec /gotdata/ {
    printa("%d,%s,%s,%@d,%@d,%@d,%@d,%@d,%@d,%@d,%@d,%@d,%@d,%@d,%@d,%@d,%@d,%@d,%@d,%@d\n", 
            @lt100us, @lt1000us, @lt10000us, @lt100ms, @lt1000ms, 
            @gt1000ms, @blkszle1k, @blksz2k, @blksz4k, @blksz8k, @blksz16k,
            @blksz32k, @blksz64k, @blksz128k, @blksz256k, @blkszgt256k, @cnt
            ) ;
    trunc(@lt100us) ; trunc(@lt1000us) ; trunc(@lt10000us) ;
    trunc(@lt100ms) ; trunc(@lt1000ms) ; trunc(@gt1000ms) ;
    trunc(@blkszle1k) ; trunc(@blksz2k) ; trunc(@blksz4k) ;
    trunc(@blksz8k) ; trunc(@blksz16k) ; trunc(@blksz32k) ;
    trunc(@blksz64k) ; trunc(@blksz128k) ; trunc(@blksz256k) ;
    trunc(@blkszgt256k) ; trunc(@cnt) ;
    gotdata = 0 ;
  }

  /* 
   * Advance clock each second, establishing a 1 second 
   * resolution for samples.
   */
  tick-1sec {
      timenow = walltimestamp / 1000000000;
  }'