This is a result of a best practices conversation I had recently. The goal is to observe resources, like time spent in kernel, v. user, number of syscalls, and their cumulative time, this seemingly trivial example problem here requires.
Basically, we build golocks.go
and measure on illumos-based system.
wallclock: 1103855(μS)
time locked cumulative: 26435(μS) | per goroutine: 2(μS)
goroutine blocked avg: 1006519(μS)
golocks sleeping on sched | time (μS):
value ------------- Distribution ------------- count
4 | 0
8 | 1
16 |@ 6
32 |@@@@@@@@@@@@@@ 66
64 |@@@@@@ 29
128 |@@@@@@@@ 36
256 |@ 6
512 | 0
1024 | 0
2048 | 1
4096 | 0
8192 |@@@@@@@@ 37
16384 | 1
32768 | 0
65536 | 0
131072 | 0
262144 | 0
524288 | 2
1048576 | 0
golocks sleeping on golocks | time (μS):
value ------------- Distribution ------------- count
2 | 0
4 |@@@ 5
8 |@@@@@ 10
16 |@@ 4
32 |@@ 4
64 |@@@ 6
128 |@@ 3
256 |@ 1
512 |@@@ 6
1024 |@@@ 5
2048 |@@@ 5
4096 |@@@@@@ 12
8192 |@@@@@ 9
16384 | 0
32768 | 0
65536 | 0
131072 | 0
262144 | 0
524288 |@@ 4
1048576 |@ 1
2097152 | 0
LWP: 2 sleep: 45(μS)
LWP: 7 sleep: 6574(μS)
LWP: 4 sleep: 32137(μS)
LWP: 1 sleep: 47954(μS)
LWP: 6 sleep: 2187241(μS)
LWP: 0 sleep: 2415351(μS)
LWP: 3 sleep: 2776542(μS)
All LWPs sleep: 7465847(μS)
dtrace -qn '
sched:::sleep /execname=="golocks"/ {
bedtime[curlwpsinfo->pr_addr] = timestamp;
}
sched:::wakeup /bedtime[args[0]->pr_addr]/ {
this->delta = timestamp - bedtime[args[0]->pr_addr];
@[stringof(args[1]->pr_fname), execname] =
quantize(this->delta/1000);
@slwp[curlwpsinfo->pr_lwpid] = sum(this->delta);
@sall = sum(this->delta);
bedtime[args[0]->pr_addr] = 0;
}
END {
normalize(@slwp, 1000); normalize(@sall, 1000);
printa("\t %s sleeping on %s | time (μS):%@d\n", @);
printa("LWP: %d sleep: %@d(μS)\n", @slwp);
printa("All LWPs sleep: %@d(μS)\n", @sall);
}' -c './golocks'
wallclock: 1096333(μS)
time locked cumulative: 35615(μS) | per goroutine: 3(μS)
goroutine blocked avg: 975303(μS)
migrated? yes | time (μS):
value ------------- Distribution ------------- count
512 | 0
1024 |@@@@@@@ 1
2048 | 0
4096 |@@@@@@@ 1
8192 |@@@@@@@ 1
16384 | 0
32768 | 0
65536 | 0
131072 | 0
262144 | 0
524288 |@@@@@@@@@@@@@@@@@@@@ 3
1048576 | 0
migrated? no | time (μS):
value ------------- Distribution ------------- count
8 | 0
16 |@ 3
32 |@@@@ 14
64 |@@@@@@@@@@@@@@@@@@@ 67
128 |@@@@@@@@@@@ 38
256 | 1
512 | 0
1024 | 1
2048 | 1
4096 |@ 3
8192 |@@@ 10
16384 |@ 2
32768 | 0
65536 | 0
131072 | 0
262144 | 0
524288 |@ 4
1048576 | 0
LWP: 6 cumulative sleep: 941688(μS)
LWP: 4 cumulative sleep: 961931(μS)
LWP: 7 cumulative sleep: 980740(μS)
LWP: 1 cumulative sleep: 1011001(μS)
LWP: 3 cumulative sleep: 1034742(μS)
LWP: 5 cumulative sleep: 1037925(μS)
LWP: 2 cumulative sleep: 1039610(μS)
dtrace -qn 'sched:::off-cpu /curlwpsinfo->pr_state == SSLEEP/ {
self->cpu = cpu;
self->ts = timestamp;
}
sched:::on-cpu /self->ts/ {
this->delta = timestamp - self->ts;
@[self->cpu == cpu ? "no" : "yes"] =
quantize(this->delta / 1000);
self->ts = 0;
self->cpu = 0;
@cumm = sum(this->delta);
}
END {
normalize(@cumm, 1000);
printa("\t migrated? %s | time (μS): %@d\n", @);
printa("cumulative sleep: %@d(μS)\n", @cumm);
}' -c './golocks'
wallclock: 1105054(μS)
time locked cumulative: 25859(μS) | per goroutine: 2(μS)
goroutine blocked avg: 1006175(μS)
------------------------------------------------------
SYSCALL(s) spent (kernel: 93% | userspace: 7%)
------------------------------------------------------
sigpending 3(μS) | avg: 3(μS)
getpid 4(μS) | avg: 4(μS)
sysi86 5(μS) | avg: 5(μS)
getrlimit 5(μS) | avg: 5(μS)
close 8(μS) | avg: 8(μS)
lwp_cond_broadcast 9(μS) | avg: 4(μS)
priocntlsys 11(μS) | avg: 3(μS)
sysconfig 13(μS) | avg: 3(μS)
brk 14(μS) | avg: 4(μS)
stat 16(μS) | avg: 16(μS)
getcwd 19(μS) | avg: 19(μS)
lwp_detach 23(μS) | avg: 3(μS)
lwp_continue 23(μS) | avg: 3(μS)
pollsys 24(μS) | avg: 8(μS)
read 25(μS) | avg: 8(μS)
setcontext 41(μS) | avg: 4(μS)
open 41(μS) | avg: 41(μS)
schedctl 57(μS) | avg: 8(μS)
sigaltstack 63(μS) | avg: 4(μS)
uucopy 103(μS) | avg: 17(μS)
lwp_create 173(μS) | avg: 28(μS)
lwp_sigmask 175(μS) | avg: 6(μS)
write 241(μS) | avg: 48(μS)
yield 401(μS) | avg: 33(μS)
sigaction 469(μS) | avg: 3(μS)
mmap 767(μS) | avg: 10(μS)
nanosleep 721658(μS) | avg: 31376(μS)
lwp_park 7211466(μS) | avg: 53418(μS)
all_syscalls 7935870(μS) | avg: 0(μS)
------------------------------------------------------
SYSCALL count
------------------------------------------------------
close = 1
getcwd = 1
getpid = 1
getrlimit = 1
open = 1
sigpending = 1
stat = 1
sysi86 = 1
lwp_cond_broadcast = 2
brk = 3
pollsys = 3
priocntlsys = 3
read = 3
sysconfig = 4
write = 5
lwp_continue = 6
lwp_create = 6
lwp_detach = 6
uucopy = 6
schedctl = 7
setcontext = 9
yield = 12
sigaltstack = 14
nanosleep = 23
lwp_sigmask = 27
mmap = 76
sigaction = 132
lwp_park = 135
package main
import (
"fmt"
"sync"
"time"
)
// #include <sys/time.h>
import "C"
func main() {
var m sync.Mutex
var wg sync.WaitGroup
var spent uint64
var counter int64
var spentBlocked uint64
const upperLimit = 10000
var beforeLoop = time.Now()
for i := 0; i < upperLimit; i++ {
wg.Add(1)
go func(idx int) {
sinner := time.Now()
m.Lock()
// Critical section here
counter++
m.Unlock() // Drop lock when doing IO
dinner := time.Since(sinner)
spent += uint64(dinner)
bb := C.gethrtime() // Avoid runtime's magic
time.Sleep(1000 * time.Millisecond)
ab := C.gethrtime() // Avoid runtime's magic again
spentBlocked += uint64(ab - bb)
wg.Done()
}(i)
}
wg.Wait()
human := time.Since(beforeLoop)
fmt.Printf("wallclock: %d(μS)\n", human/1000)
fmt.Printf("time locked cumulative: %d(μS) | per goroutine: %d(μS)\n",
spent/1000, spent/10000000)
fmt.Printf("goroutine blocked avg: %d(μS)\n",
(spentBlocked / upperLimit / 1000))
}
dtrace -qn '
long context[string];
/* dtrace itself needs to be accounted for */
syscall:::entry /execname != "dtrace"/
{ self->t = timestamp; }
syscall:::return /self->t/ {
this->delta = timestamp - self->t;
@[probefunc] = sum(this->delta); self->t = 0;
@["all_syscalls"] = sum(this->delta);
@avlat[probefunc] = avg(this->delta);
@avlat["all_syscalls"] = avg(1);
@ct[probefunc] = count();
/* @cttid[probefunc, tid] = count(); */
}
profile-995 {
/* @ticks["kernel=1 | user=0"] = quantize(arg0?1:0); */
context[arg0 ? "kernel" : "user"]++;
}
END {
this->kern_time = 100*context["kernel"] /
(context["user"] + context["kernel"]);
this->user_time = 100 - this->kern_time;
normalize(@, 1000); normalize(@avlat, 1000);
printf("------------------------------------------------------\n");
printf("\tSYSCALL(s) spent (kernel: %d%% | userspace: %d%%)\n",
this->kern_time, this->user_time);
printf("------------------------------------------------------\n");
printa("%-16s\t%@d(μS) | avg: %@d(μS)\n", @, @avlat);
printf("------------------------------------------------------\n");
printf("\tSYSCALL count\n");
printf("------------------------------------------------------\n");
printa("%s = %@d\n", @ct);
/* printa("%s | tid:%d = %@d\n", @cttid); */
}' -c './golocks'