michabbb
12/7/2014 - 7:17 PM

In case someone is interested in monitoring a crate cluster with nagios (and php) based on this thread: https://github.com/crate/crate/issue

In case someone is interested in monitoring a crate cluster with nagios (and php) based on this thread: https://github.com/crate/crate/issues/1396#issuecomment-64418128 - i tried to do the same with php. because (right now) i am not that familar with AngularJS - i cannot ensure that everything is translated correct, so please be aware that this is justthe "first try" and not the final version. - this is not a stand-alone script, just to demonstrate how it´s done - requires: https://github.com/michabbb/php-crate

<?php
use macropage\helper\crate as crate;

/**
 * Class Mode_nagios_cratedb
 */
class Mode_nagios_crate_db_cluster_status extends Mode_nagios {

    public function crate_db_cluster_status() {

        $crate = new crate\crate($this->aConfig['jxmldb.rest.api']['pool'],$this->aConfig['jxmldb.rest.api']['url']);
        $tableStmt = $crate->sql('select table_name, number_of_shards, number_of_replicas, schema_name, partitioned_by from information_schema.tables where schema_name in (\'doc\', \'blob\')');
        $shardStmt = $crate->sql('select table_name, schema_name, sum(num_docs), "primary", avg(num_docs), count(*), state, sum(size) from sys.shards group by table_name, schema_name, "primary", state  order by table_name, "primary", state');
        if (\utils::array_key_exists_and_not_empty('debug',$this->Params)) {
            print_r($shardStmt);
            print_r($tableStmt);
        }
        //exit;
        if (
                (!$tableStmt['state'])
             || (!$shardStmt['state'])
             || ($shardStmt['status']!=200)
             || ($tableStmt['status']!=200)
             || (!$tableStmt['numrows'])
             || (!$shardStmt['numrows'])
        ) {
            $this->StopWithMessage('===> UNABLE TO CHECK CLUSTER!',STATE_CRITICAL,"\n====> ","\n\n");
        } else {
            $partitioned = 0;
            $shards_started = 0;
            $shards_unassigned = 0;
            $all_primaries = false;
            $shards_primary = array();
            $activePrimaryShards = 0;
            $numActivePrimaryShards = 0;
            $underreplicatedShards = 0;
            $numberOfChards = 0;
            $tables = array();
            foreach ($tableStmt['result'] as $table) {
                if (strlen($table['partitioned_by'])>0) $partitioned++;
                $numberOfChards += $table['number_of_shards'];
            }
            foreach ($shardStmt['result'] as $table) {
                $tables[$table['table_name']] = 1;
                if ($table['state'] == "STARTED") $shards_started++;
                if ($table['state'] == "UNASSIGNED") $shards_unassigned++;
                if ($table['primary']) $shards_primary[$table['table_name']]=1;
                if (
                        (($table['state'] == "STARTED")
                     || ($table['state'] == "RELOCATING")
                     || ($table['state'] == ""))
                         &&
                        ($table['primary'])
                ) {
                    $activePrimaryShards++;
                    $numActivePrimaryShards += $table['count(*)'];
                }
                if (
                           (($table['state'] != "STARTED")
                        && ($table['state'] != "RELOCATING")
                        && ($table['state'] != ""))
                            &&
                           (!$table['primary'])
                ) {
                    $underreplicatedShards++;
                }
            }
            $count_tables = (int)count($tables);
            $count_primaries = (int)0;
            foreach ($tables as $table => $tmp) {
                if (array_key_exists($table,$shards_primary)) $count_primaries++;
            }
            if ($count_tables === $count_primaries) $all_primaries = true;
            $missing_shards = ($numberOfChards-$numActivePrimaryShards);

            if (\utils::array_key_exists_and_not_empty('debug',$this->Params)) {
                echo "all_primaries: " . var_dump($all_primaries) . "\n";
                echo "partitioned: " . $partitioned . "\n";
                echo "shards_started: " . $shards_started . "\n";
                echo "activePrimaryShards: " . $activePrimaryShards . "\n";
                echo "numActivePrimaryShards: " . $numActivePrimaryShards . "\n";
                echo "numberOfChards: " . $numberOfChards . "\n";
                echo "missing_shards: " . $missing_shards . "\n";
                echo "underreplicatedShards: " . $underreplicatedShards . "\n";
            }

            /**
             * https://github.com/crate/crate/issues/1396#issuecomment-64418128
             * https://github.com/crate/crate-admin/blob/master/app/scripts/services/tableinfo.js#L67
             * https://github.com/crate/crate-admin/blob/master/app/scripts/services/tableinfo.js#L181
             *
             * this.health = function health() {
             * if (this.partitioned && this.startedShards() === 0) return 'good';
            * if (this.primaryShards().length === 0) return 'critical';
            * if (this.missingShards() > 0) return 'critical';
            * if (this.unassignedShards() > 0 || this.underreplicatedShards()) return 'warning';
            * return 'good';
            * };
             */

            if (($partitioned) && (!$shards_started)) $this->StopWithMessage('HEALTH: GOOD',STATE_OK,"\n====> ","\n\n");
            if (!$all_primaries) $this->StopWithMessage('HEALTH: CRITICAL (missing primary shards)',STATE_CRITICAL,"\n====> ","\n\n");
            if ($missing_shards) $this->StopWithMessage('HEALTH: CRITICAL (missing missing_shards)',STATE_CRITICAL,"\n====> ","\n\n");
            if (($shards_unassigned) || ($underreplicatedShards)) $this->StopWithMessage('HEALTH: WARNING (shards_unassigned or underreplicatedShards)',STATE_WARNING,"\n====> ","\n\n");
            $this->StopWithMessage('HEALTH: GOOD',STATE_OK,"\n====> ","\n\n");
        }
    }
}