szaydel
6/10/2014 - 3:42 PM

Dtrace snippets for observation of ZFS TXG operations.

Dtrace snippets for observation of ZFS TXG operations.

# dtrace -qn '::BEGIN {x["rand"] = 0} ::txg_delay:entry { @[walltimestamp] = count(); x["rand"] = 1; } tick-1sec /x["rand"] == 1/ {printa("%Y %@d\n", @); trunc(@); x["rand"] = 0; }'
#!/usr/sbin/dtrace -s
#pragma D option quiet
/*
  This script should give us a quantized distribution of
  time delta from io:::start to io:::done, i.e. amount of
  time it takes to complete IO operations, separated by
  reads and writes.
*/
BEGIN
{
        start = timestamp;
}

io:::start
{
        ts[args[0]->b_edev, args[0]->b_lblkno] = timestamp;
}

io:::done
/ts[args[0]->b_edev, args[0]->b_lblkno]/
{
        this->delta = (timestamp - ts[args[0]->b_edev, args[0]->b_lblkno]) / 1000;
        this->name = (args[0]->b_flags & (B_READ | B_WRITE)) == B_READ ?
            "read " : "write ";

        @q[this->name] = quantize(this->delta);
        @a[this->name] = avg(this->delta);
        @v[this->name] = stddev(this->delta);
        @i[this->name] = count();
        @b[this->name] = sum(args[0]->b_bcount);

        ts[args[0]->b_edev, args[0]->b_lblkno] = 0;
}

END
{
        printa(@q);

        normalize(@i, (timestamp - start) / 1000000000);
        normalize(@b, (timestamp - start) / 1000000000 * 1024);

        printf("%-30s %11s %11s %11s %11s\n", "", "avg latency", "stddev",
            "iops", "throughput");
        printa("%-30s %@9uus %@9uus %@9u/s %@8uk/s\n", @a, @v, @i, @b);
}
// On a healthy system we should see these events roughly every 5 seconds.
// Give name of pool as single argument to this snippet.
# dtrace -qn '
txg-syncing
{
        this->dp = (dsl_pool_t *)arg0;
}

txg-syncing
/this->dp->dp_spa->spa_name == $$1/
{
        printf("%Y %4dMB of %4dMB used\n", walltimestamp, this->dp->dp_dirty_total >> 20,
            `zfs_dirty_data_max >> 20);
}
'

// A more generic version where all pools are given to us as the syncs happen.
# dtrace -qn '
txg-syncing
{
        this->dp = (dsl_pool_t *)arg0;
        this->spa = *((dsl_pool_t *)arg0)->dp_spa;
}

txg-syncing
/this->spa.spa_name != NULL/
{
        printf("%Y %-16s %4dMB of %4dMB used\n", 
            walltimestamp,
            this->spa.spa_name, 
            this->dp->dp_dirty_total >> 20,
            `zfs_dirty_data_max >> 20);
}
'