#!/usr/bin/perl -w
#
# varnish_ r4439 (http://varnish-cache.org/browser/trunk/varnish-tools/munin/varnish_?rev=4439)
# with the following patch applied
# http://lists.varnish-cache.org/pipermail/varnish-dev/2009-December/002347.html
# http://lists.varnish-cache.org/pipermail/varnish-dev/attachments/20091216/841e78c9/attachment-0001.txt
# it seems mixing up the name of the instance with the title of the graph is a bad idea
# because -n identifies the directory to search for runtime files
#
# varnish_ - Munin plugin for multiple Varnish Servers
# Copyright (C) 2009  Redpill Linpro AS
#
# Author: Kristian Lyngstøl <kristian@redpill-linpro.com>
# Modified by Paul Mansfield so that it can be used
# with multiple varnish instances
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.

=head1 NAME

varnish_ - Munin plugin to monitor various aspects of multiple varnish servers

=head1 APPLICABLE SYSTEMS

Varnish 2.0 or newer with varnishstat

=head1 CONFIGURATION

The plugin needs to be able to execute varnishstat.

The configuration section shows the defaults
  [varnish_*]
     env.varnishstat varnishstat
     env.name

env.varnishstat can be a full path to varnishstat if it's
not in the path already.

env.name is blank (undefined) by default and can be used to specify a -n
name argument to varnish if multiple instances are running on the same
server.

A few aspects are not linked by default. They are marked as
'DEBUG' => 'yes' (or any other value). They are:

data_structures, vcl_and_purges, lru, objects_per_objhead,
obj_sendfile_vs_write, losthdr, esi, hcb, shm, shm_writes, overflow,
session, session_herd

You can link them yourself with something like this:
ln -s /foo/varnish_ /etc/munin/plugins/varnish_data_structures

=head1 INTERPRETATION

Each graph uses data from varnishstat.

=head1 MAGIC MARKERS

 #%# family=auto
 #%# capabilities=autoconf suggest

=head1 VERSION

 $Id$

=head1 BUGS

The hit_rate graph requires munin r2040 or newer to display
correctly.

=head1 PATCHES-TO

The varnish-tools repository is the canonical upstream for this plugin.
Please send patches to Kristian Lyngstol <kristian@redpill-linpro.com>
and/or varnish-misc@projects.linpro.no for inclusion before adding to
munin trunk.

=head1 AUTHOR

Kristian Lyngstol <kristian@redpill-linpro.com>

=head1 LICENSE

GPLv2

=cut


use strict;

# Set to 1 to enable output when a variable is defined in a graph but
# omitted because it doesn't exist in varnishstat.
my $DEBUG = 0;

# Set to 1 to ignore 'DEBUG' and suggest all available aspects.
my $FULL_SUGGEST = 0;

# Varnishstat executable. Include full path if it's not in your path.
my $varnishstatexec = exists $ENV{'varnishstat'} ? $ENV{'varnishstat'} : "varnishstat";

# For multiple instances
my $varnishname = exists $ENV{'name'} ? $ENV{'name'} : undef;
my $graphname = exists $ENV{'graphname'} ? $ENV{'graphname'} : undef;

my %varnishstat = ();
my %varnishstatnames = ();
my $self; # Haha, myself, what a clever pun.

# Parameters that can be defined on top level of a graph. Config will print
# them as "graph_$foo $value\n"
my @graph_parameters = ('title','total','order','scale','vlabel','args');

# Parameters that can be defined on a value-to-value basis and will be
# blindly passed to config. Printed as "$fieldname.$param $value\n".
#
# 'label' is hardcoded as it defaults to a varnishstat-description if not
# set.
my @field_parameters = ('graph', 'min', 'max', 'draw', 'cdef', 'warning',
            'colour', 'info', 'type');

# Data structure that defines all possible graphs (aspects) and how they
# are to be plotted. Every top-level entry is a graph/aspect. Each top-level graph
# MUST have title set and 'values'.
#
# The 'values' hash must have at least one value definition. The actual
# value used is either fetched from varnishstat based on the value-name, or
# if 'rpn' is defined: calculated. 'type' SHOULD be set.
#
# Graphs with 'DEBUG' set to anything is omitted from 'suggest'.
#
# 'rpn' on values allows easy access to graphs consisting of multiple
# values from varnishstat. (Reverse polish notation). The RPN
# implementation only accepts +-*/ and varnishstat-values.
#
# With the exception of 'label', which is filled with the
# varnishstat-description if left undefined, any value left undefined will
# be left up to Munin to define/ignore/yell about.
#
# See munin documentation or rrdgraph/rrdtool for more information.
my %ASPECTS = (
    'request_rate' => {
        'title' => 'Request rates',
        'order' => 'cache_hit cache_hitpass cache_miss '
             . 'backend_conn backend_unhealthy '
             . 'client_req client_conn' ,
        'values' => {
            'client_conn' => {
                'type' => 'DERIVE',
                'min' => '0',
                'colour' => '444444',
                'graph' => 'ON'
            },
            'client_req' => {
                'type' => 'DERIVE',
                'colour' => '111111',
                'min' => '0'
            },
            'cache_hit' => {
                'type' => 'DERIVE',
                'draw' => 'AREA',
                'colour' => '00FF00',
                'min' => '0'
            },
            'cache_hitpass' => {
                'info' => 'Hitpass are cached passes: An '
                    . 'entry in the cache instructing '
                    . 'Varnish to pass. Typically '
                    . 'achieved after a pass in '
                    . 'vcl_fetch.',
                'type' => 'DERIVE',
                'draw' => 'STACK',
                'colour' => 'FFFF00',
                'min' => '0'
            },
            'cache_miss' => {
                'type' => 'DERIVE',
                'colour' => 'FF0000',
                'draw' => 'STACK',
                'min' => '0'
            },
            'backend_conn' => {
                'type' => 'DERIVE',
                'colour' => '995599',
                'min' => '0'
            },
            'backend_unhealthy' => {
                'type' => 'DERIVE',
                'min' => '0',
                'colour' => 'FF55FF'
            },
            's_pipe' => {
                'type' => 'DERIVE',
                'min' => '0',
                'colour' => '1d2bdf'
            },
            's_pass' => {
                'type' => 'DERIVE',
                'min' => '0',
                'colour' => '785d0d'
            }
        }
    },
    'hit_rate' => {
        'title' => 'Hit rates',
        'order' => 'client_req cache_hit cache_miss '
             . 'cache_hitpass' ,
        'vlabel' => '%',
        'args' => '-u 100 --rigid',
        'scale' => 'no',
        'values' => {
            'client_req' => {
                'type' => 'DERIVE',
                'min' => '0',
                'graph' => 'off'
            },
            'cache_hit' => {
                'type' => 'DERIVE',
                'min' => '0',
                'draw' => 'AREA',
                'cdef' => 'cache_hit,client_req,/,100,*'
            },
            'cache_miss' => {
                'type' => 'DERIVE',
                'draw' => 'STACK',
                'min' => '0',
                'cdef' => 'cache_miss,client_req,/,100,*'
            },
            'cache_hitpass' => {
                'type' => 'DERIVE',
                'draw' => 'STACK',
                'min' => '0',
                'cdef' => 'cache_hitpass,client_req,/,100,*'
            },
        }
    },
    'backend_traffic' => {
        'title' => 'Backend traffic',
        'values' => {
            'backend_conn' => {
                'type' => 'DERIVE',
                'min' => '0'
            },
            'backend_unhealthy' => {
                'type' => 'DERIVE',
                'min' => '0',
                'warning' => ':1'
            },
            'backend_busy' => {
                'type' => 'DERIVE',
                'min' => '0'
            },
            'backend_fail' => {
                'type' => 'DERIVE',
                'min' => '0'
            },
            'backend_reuse' => {
                'type' => 'DERIVE',
                'min' => 0
            },
            'backend_recycle' => {
                'type' => 'DERIVE',
                'min' => 0
            },
            'backend_unused' => {
                'type' => 'DERIVE',
                'min' => '0'
            },
            'backend_req' => {
                'type' => 'DERIVE',
                'min' => '0'
            }
        }
    },
    'objects' => {
        'title' => 'Number of objects',
        'values' => {
            'n_object' => {
                'type' => 'GAUGE',
                'label' => 'Number of objects'
            },
            'n_objecthead' => {
                'type' => 'GAUGE',
                'label' => 'Number of object heads',
                'info' => 'Each object head can have one '
                    . 'or more ojbect attached, '
                    . 'typically based on the Vary: header'
            }
        }
    },
    'transfer_rates' => {
        'title' => 'Transfer rates',
        'order' => 's_bodybytes s_hdrbytes',
        'args' => '-l 0',
        'vlabel' => 'bit/s',
        'values' => {
            's_hdrbytes' => {
                'type' => 'DERIVE',
                'label' => 'Header traffic',
                'draw' => 'STACK',
                'min' => '0',
                'info' => 'HTTP Header traffic. TCP/IP '
                    . 'overhead is not included.',
                'cdef' => 's_hdrbytes,8,*'
            },
            's_bodybytes' => {
                'type' => 'DERIVE',
                'draw' => 'AREA',
                'label' => 'Body traffic',
                'min' => '0',
                'cdef' => 's_bodybytes,8,*'
            }
        }
    },
    'threads' => {
        'title' => 'Thread status',
        'values' => {
            'n_wrk' => {
                'type' => 'GAUGE',
                'min' => '0',
                'warning' => '1:'
            },
            'n_wrk_create' => {
                'type' => 'DERIVE',
                'min' => '0'
            },
            'n_wrk_failed' => {
                'type' => 'DERIVE',
                'min' => '0',
                'warning' => ':1'
            },
            'n_wrk_max' => {
                'type' => 'DERIVE',
                'min' => '0'
            },
            'n_wrk_overflow' => {
                'type' => 'DERIVE',
                'min' => '0'
            },
            'n_wrk_drop' => {
                'type' => 'DERIVE',
                'min' => '0',
                'warning' => ':1'
            }
        }
    },
    'memory_usage' => {
        'title' => 'Memory usage',
        'order' => 'sm_balloc sma_nbytes sms_nbytes',
        'total' => 'Total',
        'args' => '--base 1024',
        'values' => {
            'sm_balloc' => {
                'type' => 'GAUGE',
                'draw' => 'AREA'
            },
            'sma_nbytes' => {
                'type' => 'GAUGE',
                'draw' => 'STACK'
            },
            'sms_nbytes' => {
                'type' => 'GAUGE',
                'draw' => 'STACK'
            }
        }
    },
    'uptime' => {
        'title' => 'Varnish uptime',
        'vlabel' => 'days',
        'scale' => 'no',
        'values' => {
            'uptime' => {
                'type' => 'GAUGE',
                'cdef' => 'uptime,86400,/'
            }
        }
    },
    'objects_per_objhead' => {
        'title' => 'Objects per objecthead',
        'DEBUG' => 'yes',
        'values' => {
            'obj_per_objhead' => {
                'type' => 'GAUGE',
                'label' => 'Objects per object heads',
                'rpn' => [ 'n_object','n_objecthead','/' ]
            }
        }
    },
    'losthdr' => {
        'title' => 'HTTP Header overflows',
        'DEBUG' => 'yes',
        'values' => {
            'losthdr' => {
                'type' => 'DERIVE',
                'min' => '0'
            }
        }
    },
    'obj_sendfile_vs_write' => {
        'title' => 'Objects delivered with sendfile() versus '
             . 'write()',
        'DEBUG' => 'yes',
        'values' => {
            'n_objsendfile' => {
                'type' => 'DERIVE',
                'min' => '0',
            },
            'n_objwrite' => {
                'type' => 'DERIVE',
                'min' => '0'
            }
        }
    },
    'hcb' => {
        'title' => 'Critbit data',
        'DEBUG' => 'yes',
        'values' => {
            'hcb_nolock' => {
                'type' => 'DERIVE',
                'min' => '0'
            },
            'hcb_lock' => {
                'type' => 'DERIVE',
                'min' => '0'
            },
            'hcb_insert' => {
                'type' => 'DERIVE',
                'min' => '0'
            }
        }
    },
    'esi' => {
        'title' => 'ESI',
        'DEBUG' => 'yes',
        'values' => {
            'esi_parse' => {
                'type' => 'DERIVE',
                'min' => '0'
            },
            'esi_errors' => {
                'type' => 'DERIVE',
                'min' => '0'
            }
        }
    },
    'objoverflow' => {
        'title' => 'Objects overflowing workspace',
        'DEBUG' => 'yes',
        'values' => {
            'n_objoverflow' => {
                'type' => 'DERIVE',
                'min' => '0'
            }
        }
    },
    'session' => {
        'title' => 'Sessions',
        'DEBUG' => 'yes',
        'values' => {
            'sess_closed' => {
                'type' => 'DERIVE',
                'min' => '0'
            },
            'sess_pipeline' => {
                'type' => 'DERIVE',
                'min' => '0'
            },
            'sess_readahead' => {
                'type' => 'DERIVE',
                'min' => '0'
            },
            'sess_linger' => {
                'type' => 'DERIVE',
                'min' => '0'
            }
        }
    },
    'session_herd' => {
        'title' => 'Session herd',
        'DEBUG' => 'yes',
        'values' => {
            'sess_herd' => {
                'type' => 'DERIVE',
                'min' => '0'
            }
        }
    },
    'shm_writes' => {
        'title' => 'SHM writes and records',
        'DEBUG' => 'yes',
        'values' => {
            'shm_records' => {
                'type' => 'DERIVE',
                'min' => '0'
            },
            'shm_writes' => {
                'type' => 'DERIVE',
                'min' => '0'
            }
        }
    },
    'shm' => {
        'title' => 'Shared memory activity',
        'DEBUG' => 'yes',
        'values' => {
            'shm_flushes' => {
                'type' => 'DERIVE',
                'min' => '0'
            },
            'shm_cont' => {
                'type' => 'DERIVE',
                'min' => '0'
            },
            'shm_cycles' => {
                'type' => 'DERIVE',
                'min' => '0'
            }
        }
    },
    'allocations' => {
        'title' => 'Memory allocation requests',
        'DEBUG' => 'yes',
        'values' => {
            'sm_nreq' => {
                'type' => 'DERIVE',
                'min' => '0'
            },
            'sma_nreq' => {
                'type' => 'DERIVE',
                'min' => '0'
            },
            'sms_nreq' => {
                'type' => 'DERIVE',
                'min' => '0'
            }
        }
    },
    'vcl_and_purges' => {
        'title' => 'VCL and purges',
        'DEBUG' => 'yes',
        'values' => {
            'n_backend' => {
                'type' => 'GAUGE'
            },
            'n_vcl' => {
                'type' => 'DERIVE',
                'min' => '0'
            },
            'n_vcl_avail' => {
                'type' => 'DERIVE',
                'min' => '0'
            },
            'n_vcl_discard' => {
                'type' => 'DERIVE',
                'min' => '0'
            },
            'n_purge' => {
                'type' => 'GAUGE'
            },
            'n_purge_add' => {
                'type' => 'DERIVE',
                'min' => '0'
            },
            'n_purge_retire' => {
                'type' => 'DERIVE',
                'min' => '0'
            },
            'n_purge_obj_test' => {
                'type' => 'DERIVE',
                'min' => '0'
            },
            'n_purge_re_test' => {
                'type' => 'DERIVE',
                'min' => '0'
            },
            'n_purge_dups' => {
                'type' => 'DERIVE',
                'min' => '0'
            }
        }
    },
    'expunge' => {
        'title' => 'Object expunging',
        'values' => {
            'n_expired' => {
                'type' => 'DERIVE',
                'min' => '0'
            },
            'n_lru_nuked' => {
                'type' => 'DERIVE',
                'min' => '0'
            }
        }
    },
    'lru' => {
        'title' => 'LRU activity',
        'DEBUG' => 'yes',
        'values' => {
            'n_lru_saved' => {
                'type' => 'DERIVE',
                'min' => '0'
            },
            'n_lru_moved' => {
                'type' => 'DERIVE',
                'min' => '0'
            }
        }
    },
    'data_structures' => {
        'DEBUG' => 'YES',
        'title' => 'Data structure sizes',
        'values' => {
            'n_srcaddr' => {
                'type' => 'GAUGE'
            },
            'n_srcaddr_act' => {
                'type' => 'GAUGE'
            },
            'n_sess_mem' => {
                'type' => 'GAUGE'
            },
            'n_sess' => {
                'type' => 'GAUGE'
            },
            'n_smf' => {
                'type' => 'GAUGE'
            },
            'n_smf_frag' => {
                'type' => 'GAUGE'
            },
            'n_smf_large' => {
                'type' => 'GAUGE'
            },
            'n_vbe_conn' => {
                'type' => 'GAUGE'
            },
            'n_bereq' => {
                'type' => 'GAUGE'
            }
        }
    }
);

# Populate %varnishstat with values and %varnishstatnames with
# descriptions.
sub populate_stats
{
    my $arg = "-1";
    if ($varnishname) {
        $arg .= " -n $varnishname";
    }
    foreach my $line (`$varnishstatexec $arg`) {
        chomp($line);
        if ($line =~ /^([^ ]*)\s+(\d+)\s+(\d*\.\d*)\s+(.*)$/) {
            $varnishstat{"$1"} = $2;
            $varnishstatnames{"$1"} = $4;
        }
    }
}

# Bail-function.
sub usage
{
    if (defined(@_) && "@_" ne "") {
        print STDERR "@_" . "\n\n";
    }
    print STDERR "Known arguments: suggest, config, autoconf.\n";
    print STDERR "Run with suggest to get a list of known aspects.\n";
    exit 1;
}

# Print 'yes' and exit true if it's reasonable to use this plugin.
# Otherwise exit with false and a human-readable reason.
sub autoconf
{
    if (`which $varnishstatexec` eq '') {
        print "no (which $varnishstatexec returns blank)\n";
        exit 1;
    }
    print "yes\n";
    exit 0;
}

# Suggest relevant aspects/values of $self.
# 'DEBUG'-graphs are excluded.
sub suggest
{
    foreach my $key (keys %ASPECTS) {
        if (defined($ASPECTS{$key}{'DEBUG'}) && $FULL_SUGGEST != 1) {
            next;
        }
        print "$key\n";
    }
}

# Print the value of a two-dimensional hash if it exist.
# Returns false if non-existant.
#
# Output is formatted for plugins if arg4 is blank, otherwise arg4 is used
# as the title/name of the field (ie: arg4=graph_title).
sub print_if_exist
{
    my %values = %{$_[0]};
    my $value = $_[1];
    my $field = $_[2];
    my $title = "$value.$field";
    if (defined($_[3])) {
        $title = $_[3];
    }
    if (defined($values{$value}{$field})) {
        print "$title $values{$value}{$field}";
        print " - $graphname" if ($graphname and $title eq 'graph_title');
        print "\n";
    } else {
        return 0;
    }
}

# Walk through the relevant aspect and print all top-level configuration
# values and value-definitions.
sub get_config
{
    my $graph = $_[0];

    # Need to double-check since set_aspect only checks this if there
    # is no argument (suggest/autoconf doesn't require a valid aspect)
    if (!defined($ASPECTS{$graph})) {
        usage "No such aspect";
    }
    my %values = %{$ASPECTS{$graph}{'values'}};

    print "graph_category Varnish\n";
    foreach my $field (@graph_parameters) {
        print_if_exist(\%ASPECTS,$graph,$field,"graph_$field");
    }

    foreach my $value (keys %values) {
        # Need either RPN definition or a varnishstat value.
        if (!defined($varnishstat{$value}) &&
            !defined($values{$value}{'rpn'})) {
            if ($DEBUG) {
                print "ERROR: $value not part of varnishstat.\n"
            }
            next;
        }

        if (!print_if_exist(\%values,$value,'label')) {
            print "$value.label $varnishstatnames{$value}\n";
        }
        foreach my $field (@field_parameters) {
            print_if_exist(\%values,$value,$field);
        }
    }
}

# Read and verify the aspect ($self) -
# the format is varnish_NAME__aspect or varnish_aspect
sub set_aspect
{
    $self = $0;
    $self =~ s/^.*\/varnish_//;

    if ($self =~ /^(\w+)__(.*)$/)
    {
        #just ignore the name here $graphname = $1;
        $self = $2;
    }
    if (!defined($ASPECTS{$self}) && @ARGV == 0) {
        usage "No such aspect";
    }
}

# Handle arguments (config, autoconf, suggest)
# Populate stats for config is necessary, but we want to avoid it for
# autoconf as it would generate a nasty error.
sub check_args
{
    if (@ARGV && $ARGV[0] eq '') {
        shift @ARGV;
    }
    if (@ARGV == 1) {
        if ($ARGV[0] eq "config") {
            populate_stats;
            get_config($self);
            exit 0;
        } elsif ($ARGV[0] eq "autoconf") {
            autoconf($self);
            exit 0;
        } elsif ($ARGV[0] eq "suggest") {
            suggest;
            exit 0;
        }
        usage "Unknown argument";
    }
}

# Braindead RPN: +,-,/,* will pop two items from @stack, and perform
# the relevant operation on the items. If the item in the array isn't one
# of the 4 basic math operations, a value from varnishstat is pushed on to
# the stack. IE: 'client_req','client_conn','/' will leave the value of
# "client_req/client_conn" on the stack.
#
# If only one item is left on the stack, it is printed. Otherwise, an error
# message is printed.
sub rpn
{
    my @stack;
    my $left;
    my $right;
    foreach my $item (@{$_[0]}) {
        if ($item eq "+") {
            $right = pop(@stack);
            $left = pop(@stack);
            push(@stack,$left+$right);
        } elsif ($item eq "-") {
            $right = pop(@stack);
            $left = pop(@stack);
            push(@stack,$left-$right);
        } elsif ($item eq "/") {
            $right = pop(@stack);
            $left = pop(@stack);
            push(@stack,$left/$right);
        } elsif ($item eq "*") {
            $right = pop(@stack);
            $left = pop(@stack);
            push(@stack,$left*$right);
        } else {
            push(@stack,int($varnishstat{$item}));
        }
    }
    if (@stack > 1)
    {
        print STDERR "RPN error: Stack has more than one item left.\n";
        print STDERR "@stack\n";
        exit 255;
    }
    print "@stack";
    print "\n";
}

################################
# Execution starts here        #
################################

set_aspect;
check_args;
populate_stats;

# We only get here if we're supposed to.

# Walks through the relevant values and either prints the varnishstat, or
# if the 'rpn' variable is set, calls rpn() to execute ... the rpn.
#
# NOTE: Due to differences in varnish-versions, this checks if the value
# actually exist before using it.
foreach my $value (keys %{$ASPECTS{$self}{'values'}}) {
    if (defined($ASPECTS{$self}{'values'}{$value}{'rpn'})) {
        print "$value.value ";
        rpn($ASPECTS{$self}{'values'}{$value}{'rpn'});
    } else {
        if (!defined($varnishstat{$value})) {
            if ($DEBUG) {
                print STDERR "Error: $value not part of "
                       . "varnishstat.\n";
            }
            next;
        }
        print "$value.value ";
        print "$varnishstat{$value}\n";
    }
}

# end varnish_ plugin