Dtrace snippets for observation of ZFS TXG operations.
# dtrace -qn '::BEGIN {x["rand"] = 0} ::txg_delay:entry { @[walltimestamp] = count(); x["rand"] = 1; } tick-1sec /x["rand"] == 1/ {printa("%Y %@d\n", @); trunc(@); x["rand"] = 0; }'
#!/usr/sbin/dtrace -s
#pragma D option quiet
/*
This script should give us a quantized distribution of
time delta from io:::start to io:::done, i.e. amount of
time it takes to complete IO operations, separated by
reads and writes.
*/
BEGIN
{
start = timestamp;
}
io:::start
{
ts[args[0]->b_edev, args[0]->b_lblkno] = timestamp;
}
io:::done
/ts[args[0]->b_edev, args[0]->b_lblkno]/
{
this->delta = (timestamp - ts[args[0]->b_edev, args[0]->b_lblkno]) / 1000;
this->name = (args[0]->b_flags & (B_READ | B_WRITE)) == B_READ ?
"read " : "write ";
@q[this->name] = quantize(this->delta);
@a[this->name] = avg(this->delta);
@v[this->name] = stddev(this->delta);
@i[this->name] = count();
@b[this->name] = sum(args[0]->b_bcount);
ts[args[0]->b_edev, args[0]->b_lblkno] = 0;
}
END
{
printa(@q);
normalize(@i, (timestamp - start) / 1000000000);
normalize(@b, (timestamp - start) / 1000000000 * 1024);
printf("%-30s %11s %11s %11s %11s\n", "", "avg latency", "stddev",
"iops", "throughput");
printa("%-30s %@9uus %@9uus %@9u/s %@8uk/s\n", @a, @v, @i, @b);
}
// On a healthy system we should see these events roughly every 5 seconds.
// Give name of pool as single argument to this snippet.
# dtrace -qn '
txg-syncing
{
this->dp = (dsl_pool_t *)arg0;
}
txg-syncing
/this->dp->dp_spa->spa_name == $$1/
{
printf("%Y %4dMB of %4dMB used\n", walltimestamp, this->dp->dp_dirty_total >> 20,
`zfs_dirty_data_max >> 20);
}
'
// A more generic version where all pools are given to us as the syncs happen.
# dtrace -qn '
txg-syncing
{
this->dp = (dsl_pool_t *)arg0;
this->spa = *((dsl_pool_t *)arg0)->dp_spa;
}
txg-syncing
/this->spa.spa_name != NULL/
{
printf("%Y %-16s %4dMB of %4dMB used\n",
walltimestamp,
this->spa.spa_name,
this->dp->dp_dirty_total >> 20,
`zfs_dirty_data_max >> 20);
}
'