szaydel
4/29/2020 - 8:55 PM

Zeek-related cgroup statistics converted to Prometheus text format

#!/bin/bash

# Generate prometheus-compatible output, which could be used with node_exporter
# file collector. This script needs to be run periodically, perhaps via cron.

#
# format_memory_usage_metrics: Takes value of memory.usage_in_bytes
# file in the relevant cgroup and re-formats it to prometheus text format.
#
function format_memory_usage_metrics {
    echo "# HELP zeek_memory_usage_bytes Memory usage reported by cgroup in memory.usage_in_bytes"
    echo "# TYPE zeek_memory_usage_bytes gauge"
    for each in /sys/fs/cgroup/memory/bro-*; do
        metric_value="$(<${each}/memory.usage_in_bytes)"
        p="$(basename ${each})"
        metric_name=zeek_memory_usage_bytes\{process\=\"${p}\"\}
        printf "%s %d\n" "${metric_name}" "${metric_value}"
    done
}

#
# format_detailed_memory_metrics: Takes all but total_ stats from memory.stat
# file in the relevant cgroup and re-formats it to prometheus text format.
#
function format_detailed_memory_metrics {
    declare -A local metric_description
    metric_description["cache"]="# of bytes of page cache memory"
    metric_description["rss"]="# of bytes of anonymous and swap cache memory (includes transparent hugepages)"
    metric_description["rss_huge"]="# of bytes of anonymous transparent hugepages"
    metric_description["mapped_file"]="# of bytes of mapped file (includes tmpfs/shmem)"
    metric_description["pgpgin"]="# of charging events to the memory cgroup. The charging event happens each time a page is accounted as either mapped anon page(RSS) or cache page(Page Cache) to the cgroup"
    metric_description["pgpgout"]="# of uncharging events to the memory cgroup. The uncharging event happens each time a page is unaccounted from the cgroup"
    metric_description["swap"]="# of bytes of swap usage"
    metric_description["dirty"]="# of bytes that are waiting to get written back to the disk"
    metric_description["writeback"]="# of bytes of file/anon cache that are queued for syncing to disk"
    metric_description["inactive_anon"]="# of bytes of anonymous and swap cache memory on inactive LRU list"
    metric_description["active_anon"]="# of bytes of anonymous and swap cache memory on active LRU list"
    metric_description["inactive_file"]="# of bytes of file-backed memory on inactive LRU list"
    metric_description["active_file"]="# of bytes of file-backed memory on active LRU list"
    metric_description["unevictable"]="# of bytes of memory that cannot be reclaimed (mlocked etc)"

    for k in "${!metric_description[@]}"; do
        echo "# HELP zeek_${k} ${metric_description[${k}]}"
        echo "# TYPE zeek_${k} gauge"
        for each in /sys/fs/cgroup/memory/bro-*; do
            stat_line=$(egrep "^${k}\ " "${each}/memory.stat")
            # We may not have some stats, like swap, in which case, skip
            if [ -z "${stat_line}" ]; then continue ; fi
            read -r k2 v <<< "${stat_line}"
            metric_value="${v}"
            p="$(basename ${each})"
            metric_name=zeek_${k}\{process\=\"${p}\"\}
            echo "${metric_name} ${metric_value}"
        done
    done
}

# Adjust as needed.
TEXTFILE_COLLECTOR_DIR=/run/node_exporter

# If not existing, create directory first.
[ ! -d ${TEXTFILE_COLLECTOR_DIR} ] && mkdir -p "${TEXTFILE_COLLECTOR_DIR}"

# Note the start time of the script.
START="$(date +%s)"

output_format_memory_usage_metrics="$(format_memory_usage_metrics)"
output_format_detailed_memory_metrics="$(format_detailed_memory_metrics)"

# Write out metrics to a temporary file.
END="$(date +%s)"
cat << EOF > "$TEXTFILE_COLLECTOR_DIR/zeek_memory_usage.prom.$$"
${output_format_memory_usage_metrics}
${output_format_detailed_memory_metrics}
zeek_memory_usage_duration_seconds $(($END - $START))
zeek_memory_usage_last_run_seconds $END
EOF

# Rename the temporary file atomically.
# This avoids the node exporter seeing half a file.
mv "${TEXTFILE_COLLECTOR_DIR}/zeek_memory_usage.prom.$$" \
  "${TEXTFILE_COLLECTOR_DIR}/zeek_memory_usage.prom"