#!/usr/bin/perl
#
# Author:   Digimer (digimer@alteeve.ca)
#           Alteeve's Niche! - https://alteeve.ca/w/
# Date:     2015-09-08
# Version:  0.2.9
# License:  GPL v2+
#
# This program ties LINBIT's DRBD into Red Hat's RHCS's fence daemon via the 'fence_node' shell call.
#
# This script was modeled heavily on Lon Hohberger <lhh[a]redhat.com>'s obliterate-peer.sh script.
#
# Exit Codes (as per; http://osdir.com/ml/linux.kernel.drbd.devel/2006-11/msg00005.html)
# - 3 -> peer is inconsistent
# - 4 -> peer is outdated (this handler outdated it) [ resource fencing ]
# - 5 -> peer was down / unreachable
# - 6 -> peer is primary
# - 7 -> peer got stonithed [ node fencing ]
# This program uses;
# - 1   = Something failed
# - 7   = Fence succeeded
# - 255 = End of program hit... should never happen.
#
# Features
# - Clusters > 2 nodes supported, provided


use strict;
use warnings;
use IO::Handle;
use Data::Dumper;
use Sys::Syslog qw(:standard :macros);

my $THIS_FILE = "rhcs_fence";

my $conf = {
	# If a program isn't at the defined path, $ENV{PATH} will be searched.
	path	=>	{
		cman_tool		=>	"/usr/sbin/cman_tool",
		fence_node		=>	"/usr/sbin/fence_node",
		uname			=>	"/bin/uname",
		logger			=>	"/bin/logger",
	},
	# General settings.
	sys	=>	{
		# Set 'debug' to '1' for DEBUG output.
		debug			=>	0,
		# Set 'local_delay' to the number of seconds to wait before
		# fencing the other node. If left to '0', Node with ID = 1 will
		# have no delay and all other nodes will wait:
		# ((node ID * 2) + 5) seconds.
		local_delay		=>	0,
		# Local host name.
		host_name		=>	"",
		log_opened		=>	0,
	},
	# The script will set this.
	cluster	=>	{
		this_node		=>	"",
	},
	# These are the environment variables set by DRBD. See 'man drbd.conf'
	# -> 'handlers'.
	env	=>	{
		# The resource triggering the fence.
		'DRBD_RESOURCE'		=>	$ENV{DRBD_RESOURCE},
		# The resource minor number, or, in the case of volumes,
		# numbers.
		'DRBD_MINOR'		=>	$ENV{DRBD_MINOR},
		# The peer(s) hostname(s), space separated.
		'DRBD_PEERS'		=>	$ENV{DRBD_PEERS},
	},
};

# Find executables.
find_executables($conf);

# Something for the logs
to_log(LOG_INFO(), $conf, __LINE__, "Attempting to fence peer using RHCS from DRBD...");

# Record the environment variables
foreach my $key (keys %{$conf->{env}})
{
	if (not defined $conf->{env}{$key}) { $conf->{env}{$key} = ""; }
	to_log(LOG_DEBUG(), $conf, __LINE__, "DEBUG: Environment variable: [$key] == [$conf->{env}{$key}]");
}

# Who am I?
get_local_node_name($conf);

# Am I up to date?
get_local_resource_state($conf);

# Who else is here?
get_info_on_other_nodes($conf);

# Who shall I kill?
get_target($conf);

# Sleep a bit to avoid a double-fence.
sleep_a_bit($conf);

# Eject the target, if I can.
eject_target($conf);

# In case cman decided by itself to fence the node...
eventually_wait_for_fenced($conf); # May exit with exit code 7

# Only kill the target if gracefull eject did not work. This is
# important because fence_node does not want to be invoked
# in multiple instances in parallel.
kill_target($conf);

exit(255);

###############################################################################
# Functions                                                                   #
###############################################################################

# This checks the given paths and, if something isn't found, it searches PATH
# trying to find it.
sub find_executables
{
	my ($conf) = @_;

	# Variables.
	my $check = "";
	my $bad   = 0;

	# Log entries can only happen if I've found 'logger', so an extra check
	# will be made on 'to_log' calls.
	my @dirs = split/:/, $ENV{PATH};
	foreach my $exe (sort {$b cmp $a} keys %{$conf->{path}})
	{
		to_log(LOG_DEBUG(), $conf, __LINE__, "DEBUG: Checking if: [$exe] is at: [$conf->{path}{$exe}]");
		if ( not -e $conf->{path}{$exe} )
		{
			to_log(LOG_DEBUG(), $conf, __LINE__, "DEBUG: It is not!");
			foreach my $path (@dirs)
			{
				$check =  "$path/$exe";
				$check =~ s/\/\//\//g;
				to_log(LOG_DEBUG(), $conf, __LINE__, "DEBUG: Checking: [$check]");
				if ( -e $check )
				{
					to_log(LOG_DEBUG(), $conf, __LINE__, "DEBUG: Found!");
					if (-e $conf->{path}{logger})
					{
						to_log(LOG_DEBUG(), $conf, __LINE__, "DEBUG: Changed path for: [$exe] from: [$conf->{path}{$exe}] to: [$check]");
					}
					else
					{
						warn "DEBUG: Changed path for: [$exe] from: [$conf->{path}{$exe}] to: [$check]\n";
					}
					$conf->{path}{$exe} = $check;
					to_log(LOG_DEBUG(), $conf, __LINE__, "DEBUG: Set 'path::$exe' to: [$conf->{path}{$exe}]");
				}
				else
				{
					to_log(LOG_DEBUG(), $conf, __LINE__, "DEBUG: Not found!");
				}
			}
		}
		else
		{
			to_log(LOG_DEBUG(), $conf, __LINE__, "DEBUG: Found!");
			next;
		}

		# Make sure it exists now.
		to_log(LOG_DEBUG(), $conf, __LINE__, "DEBUG: Checking again if: [$exe] is at: [$conf->{path}{$exe}].");
		if ( not -e $conf->{path}{$exe} )
		{
			$bad = 1;
			if (-e $conf->{path}{logger})
			{
				to_log(LOG_DEBUG(), $conf, __LINE__, "Failed to find executable: [$exe]. Unable to proceed.");
			}
			else
			{
				warn "Failed to find executable: [$exe]. Unable to proceed.\n";
			}
		}
	}
	if ($bad)
	{
		nice_exit($conf, 1);
	}

	return(0);
}

# This is an artificial delay to help avoid a double-fence situation when both
# nodes are alive, but comms failed.
sub sleep_a_bit
{
	my ($conf) = @_;

	# Variables
	my $i_am  = $conf->{sys}{this_node};
	my $my_id = $conf->{nodes}{$i_am}{id};
	my $delay = $my_id;

	to_log(LOG_DEBUG(), $conf, __LINE__, "DEBUG: I am: [$i_am] and my id is: [$my_id]");

	# I don't want to fail because I don't have an ID number.
	if (not $my_id)
	{
		$my_id = int(rand(10));
	}

	# Calculate the delay.
	$delay = (($my_id * 2) + 5);

	# But never wait more than 30 seconds.
	if ($delay > 30)
	{
		$delay = 30;
	}

	# A user value trumps all.
	if ($conf->{sys}{local_delay})
	{
		$delay = $conf->{sys}{local_delay};
	}

	# Don't wait if this is node ID 1 unless the user has defined a delay.
	if (($my_id > 1) or ($conf->{sys}{local_delay}))
	{
		to_log(LOG_DEBUG(), $conf, __LINE__, "Delaying for: [$delay] seconds to avoid dual-fencing...");
		sleep $delay;
		to_log(LOG_DEBUG(), $conf, __LINE__, "DEBUG: Right then, break over.");
	}
	else
	{
		to_log(LOG_DEBUG(), $conf, __LINE__, "I am the first node, so I won't delay.");
	}

	return(0);
}

# This kills remote node.
sub kill_target
{
	my ($conf) = @_;

	# Variables
	my $remote_node = $conf->{env}{DRBD_PEERS};
	my $sc          = "";
	my $shell_call  = "";
	my $line        = "";
	my $sc_exit     = "";

	# Hug it and squeeze it and call it George.
	to_log(LOG_DEBUG(), $conf, __LINE__, "Fencing target: [$remote_node]...");

	$sc         = IO::Handle->new();
	$shell_call = "$conf->{path}{fence_node} -v $remote_node";
	to_log(LOG_DEBUG(), $conf, __LINE__, "DEBUG: shell call: [$shell_call]");
	open ($sc, "$shell_call 2>&1 |") or to_log(LOG_ERR(), $conf, __LINE__, "Failed to call: [$sc], error was: $!");
	while(<$sc>)
	{
		chomp;
		$line = $_;
		to_log(LOG_DEBUG(), $conf, __LINE__, "DEBUG: $line");
		if ($line=~/fence .*? success/)
		{
			to_log(LOG_DEBUG(), $conf, __LINE__, "'fence_node $remote_node' appears to have succeeded!");
		}
		else
		{
			to_log(LOG_DEBUG(), $conf, __LINE__, "'fence_node $remote_node' appears to have failed!");
			to_log(LOG_DEBUG(), $conf, __LINE__, "Read: [$line]");
		}
	}
	$sc->close();
	$sc_exit = $?;
	to_log(LOG_DEBUG(), $conf, __LINE__, "DEBUG: Attempt to fence node: [$remote_node] exited with: [$sc_exit]");

	# Exit.
	if ($sc_exit)
	{
		to_log(LOG_DEBUG(), $conf, __LINE__, "Attempt to fence: [$remote_node] failed!");
		nice_exit($conf, 1);
	}
	else
	{
		to_log(LOG_DEBUG(), $conf, __LINE__, "Fencing of: [$remote_node] succeeded!");
		nice_exit($conf, 7);
	}

	# This should not be reachable.
	return(0);
}

# This ejects the remote node from the cluster, if cluster comms are still up.
sub eject_target
{
	my ($conf) = @_;

	# Variables;
	my $remote_node = "";
	my $sc          = "";
	my $sc_exit     = "";
	my $shell_call  = "";
	my $line        = "";

	### I don't know if I really want to use/keep this.
	# If the node is still a cluster member, kick it out.
	$remote_node = $conf->{env}{DRBD_PEERS};
	if ($conf->{nodes}{$remote_node}{member} eq "M")
	{
		# It is, kick it out. If cluster comms are up, this will
		# trigger a fence in a few moment, regardless of what we do
		# next.
		to_log(LOG_DEBUG(), $conf, __LINE__, "Target node: [$remote_node] is a cluster member, attempting to eject.");
		$sc         = IO::Handle->new();
		$shell_call = "$conf->{path}{cman_tool} kill -n $remote_node";
		to_log(LOG_DEBUG(), $conf, __LINE__, "DEBUG: shell call: [$shell_call]");
		open ($sc, "$shell_call 2>&1 |") or to_log(LOG_ERR(), $conf, __LINE__, "Failed to call: [$sc], error was: $!");
		while(<$sc>)
		{
			chomp;
			$line = $_;
			to_log(LOG_DEBUG(), $conf, __LINE__, "DEBUG: line: [$line]");
		}
		$sc->close();
		$sc_exit = $?;
		to_log(LOG_DEBUG(), $conf, __LINE__, "DEBUG: Attempt to force-remove node: [$remote_node] exited with: [$sc_exit]");

		return 1;
	}
	else
	{
		to_log(LOG_DEBUG(), $conf, __LINE__, "Target node: [$remote_node] is *not* a cluster member (state: [$conf->{nodes}{$remote_node}{member}]). Not ejecting.");

		return 0;
	}
}

# This identifies the remote node.
sub get_target
{
	my ($conf) = @_;

	# Variables
	my $remote_node = $conf->{env}{DRBD_PEERS};

	# Make sure I know my target.
	if ( not exists $conf->{nodes}{$remote_node} )
	{
		# Try the short name.
		$remote_node =~ s/^(.*?)\..*$//;
		if ( not exists $conf->{nodes}{$remote_node} )
		{
			to_log(LOG_DEBUG(), $conf, __LINE__, "I didn't see the other node: [$conf->{env}{DRBD_PEERS} ($remote_node)] in cman's node list. I can't fence this node.");
			print "I didn't see the other node: [$conf->{env}{DRBD_PEERS} ($remote_node)] in cman's node list. I can't fence this node.\n";
			print "Does the hostname in cluster.conf match the host names used in the DRBD resource files?\n";
			nice_exit($conf, 1);
		}
		# Update the peer.
		$conf->{env}{DRBD_PEERS} = $remote_node;
	}
	to_log(LOG_DEBUG(), $conf, __LINE__, "I have identified my target: [$remote_node]");

	return(0);
}

# This uses 'cman_tool' to get the information on the other node(s) in the
# cluster.
sub get_info_on_other_nodes
{
	my ($conf) = @_;

	# Variables
	my $node_count = 0;
	my $sc         = "";
	my $shell_call = "";
	my $sc_exit    = "";
	my $line       = "";
	my $node_id    = "";
	my $node_name  = "";
	my $member     = "";
	my $address    = "";

	$sc         = IO::Handle->new();
	$shell_call = "$conf->{path}{cman_tool} -a -F id,name,type,addr nodes";
	to_log(LOG_DEBUG(), $conf, __LINE__, "DEBUG: shell call: [$shell_call]");
	open ($sc, "$shell_call 2>&1 |") or to_log(LOG_ERR(), $conf, __LINE__, "Failed to call: [$sc], error was: $!");
	while(<$sc>)
	{
		chomp;
		$line = $_;
		($node_id, $node_name, $member, $address) = (split/ /, $line);
		to_log(LOG_DEBUG(), $conf, __LINE__, "DEBUG: id: [$node_id], name: [$node_name], member: [$member], address: [$address]");

		$conf->{nodes}{$node_name}{member}  = $member;
		$conf->{nodes}{$node_name}{id}      = $node_id;
		$conf->{nodes}{$node_name}{address} = $address;
		$node_count++;
		to_log(LOG_DEBUG(), $conf, __LINE__, "DEBUG: output: $line");
	}
	$sc->close();
	$sc_exit=$?;
	to_log(LOG_DEBUG(), $conf, __LINE__, "DEBUG: Attempt to gather cluster member information exited with: [$sc_exit]");

	return(0);
}

# This reads /proc/drbd and pulls out the state of the defined resource
sub get_local_resource_state
{
	my ($conf) = @_;

	# Variables
	my $minor      = $conf->{env}{DRBD_MINOR};
	my $sc         = "";
	my $shell_call = "";
	my $sc_exit    = "";
	my $line       = "";
	my $state      = "";

	# Minor may well be '0', so I need to check for an empty string here.
	to_log(LOG_DEBUG(), $conf, __LINE__, "DEBUG: Checking the state of resource with minor number: [$conf->{env}{DRBD_MINOR}]");
	if ($conf->{env}{DRBD_MINOR} eq "")
	{
		to_log(LOG_ERR(), $conf, __LINE__, "Resource minor number not defined! Unable to proceed.");
	}

	# minor can be a space delimited list. Assume it's always a list and iterate over it
	my @minor = split / /, $minor;

	to_log(LOG_DEBUG(), $conf, __LINE__, "DEBUG: minor: [$minor]");
	$sc         = IO::Handle->new();
	$shell_call = "</proc/drbd";
	to_log(LOG_DEBUG(), $conf, __LINE__, "DEBUG: shell call: [$shell_call]");
	open ($sc, "$shell_call") or to_log(LOG_ERR(), $conf, __LINE__, "Failed to call: [$sc], error was: $!");
	while(<$sc>)
	{
		chomp;
		$line = $_;
		to_log(LOG_DEBUG(), $conf, __LINE__, "DEBUG: line: [$line]");
		$line =~ s/^\s+//;
		foreach $minor (@minor)
		{
			if ($line =~ /^$minor: .*? ds:(.*?)\//)
			{
				$state = $1;
				if ($conf->{sys}{debug})
				{
					to_log(LOG_DEBUG(), $conf, __LINE__, "DEBUG: read state of minor: [$minor] as: [$state]");
				}
				$conf->{sys}{local_res_uptodate} = $state eq "UpToDate" ? 1 : 0;
				to_log(LOG_DEBUG(), $conf, __LINE__, "DEBUG: sys::local_res_uptodate: [$conf->{sys}{local_res_uptodate}]");
			}
		}
		# If one or more minors out not UpToDate bail
		last if ($conf->{sys}{local_res_uptodate} eq 0);
		to_log(LOG_DEBUG(), $conf, __LINE__, "DEBUG: read state of minor: [$minor] as: [$state]");
		to_log(LOG_DEBUG(), $conf, __LINE__, "DEBUG: sys::local_res_uptodate: [$conf->{sys}{local_res_uptodate}]");
	}
	$sc->close();
	$sc_exit = $?;
	to_log(LOG_DEBUG(), $conf, __LINE__, "DEBUG: Attempt to collect UpToDate information device with minor: [@minor] exited with: [$sc_exit]");

	to_log(LOG_DEBUG(), $conf, __LINE__, "DEBUG: UpToDate: [$conf->{sys}{local_res_uptodate}]");
	if (not $conf->{sys}{local_res_uptodate})
	{
		to_log(LOG_ERR(), $conf, __LINE__, "Local resource: [$conf->{env}{DRBD_RESOURCE}], minor: [@minor] is NOT 'UpToDate', will not fence peer.");
	}

	return(0);
}

# This reads in and sets the local node's name.
sub get_local_node_name
{
	my ($conf) = @_;

	# Variables
	my $sc         = "";
	my $shell_call = "";
	my $sc_exit    = "";
	my $line       = "";

	$sc         = IO::Handle->new();
	$shell_call = "$conf->{path}{cman_tool} status";
	to_log(LOG_DEBUG(), $conf, __LINE__, "DEBUG: shell call: [$shell_call]");
	open ($sc, "$shell_call 2>&1 |") or to_log(LOG_ERR(), $conf, __LINE__, "Failed to call: [$sc], error was: $!");
	while(<$sc>)
	{
		chomp;
		$line = $_;
		to_log(LOG_DEBUG(), $conf, __LINE__, "DEBUG: line: [$line]");
		if ($line =~ /Node name: (.*)/)
		{
			$conf->{sys}{this_node} = $1;
			last;
		}
	}
	$sc->close();
	$sc_exit = $?;
	to_log(LOG_DEBUG(), $conf, __LINE__, "DEBUG: Attempt to get local node name via 'cman_tool status' exited with: [$sc_exit]");

	to_log(LOG_DEBUG(), $conf, __LINE__, "DEBUG: I am: [$conf->{sys}{this_node}]");
	if (not $conf->{sys}{this_node})
	{
		to_log(LOG_ERR(), $conf, __LINE__, "Unable to find local node name.");
	}

	return(0);
}

# Log file entries
sub to_log
{
	my ($level, $conf, $line_num, $message)=@_;

	# I want the line number in DEBUG mode.
	if ($conf->{sys}{debug}) 
	{
		$message = "$line_num; $message";
		$level   = LOG_INFO() if ($level == LOG_DEBUG());
	}
	if (not $conf->{sys}{log_opened})
	{
		openlog($THIS_FILE, "", LOG_DAEMON());
		$conf->{sys}{log_opened} = 1;
	}

	# Setup the time and then the string.
	syslog($level, $message);

	if (grep($level == $_, LOG_ERR(), LOG_ALERT, LOG_CRIT, LOG_EMERG))
	{
		nice_exit($conf, 1);
	}

	return(0);
}

# Cleanly exit.
sub nice_exit
{
	my ($conf, $code)=@_;
	
	# This used to handle closing open file handles.

	exit ($code);
}

sub get_fenced_state
{
	my ($conf) = @_;

	my $sc = IO::Handle->new();
	open($sc, "fence_tool -n ls |") or to_log(LOG_ERR(), $conf, __LINE__, "Failed to call: fence_tools  error was $!\n");
	
	my %fence_state;
	<$sc> =~ /fence domain/;
	$fence_state{member_count}  = $1 if (<$sc> =~ /member count\s+([0-9]+)/);
	$fence_state{victim_count}  = $1 if (<$sc> =~ /victim count\s+([0-9]+)/);
	$fence_state{victim_now}    = $1 if (<$sc> =~ /victim now\s+([0-9]+)/);
	$fence_state{master_nodeid} = $1 if (<$sc> =~ /master nodeid\s+([0-9]+)/);
	$fence_state{wait_state}    = $1 if (<$sc> =~ /wait state\s+(\w+)$/);
	$sc->close();

	return \%fence_state;
}

sub wait_for_fenced_status
{
	my ($conf, $target_state, $time_seconds)=@_;
	my $fenced_state;

	while ($time_seconds)
	{
		$fenced_state = get_fenced_state($conf);
		if ($fenced_state->{wait_state} eq $target_state)
		{
			return $fenced_state;
		}
		sleep(1);
		to_log(LOG_DEBUG(), $conf, __LINE__, "DEBUG: Waiting for $target_state. Now $fenced_state->{wait_state}\n");
		$time_seconds--;
	}

	return $fenced_state;
}

sub eventually_wait_for_fenced
{
	my ($conf) = @_;

	my $fenced_state1 = wait_for_fenced_status($conf, "fencing", 30);

	if ($fenced_state1->{wait_state} ne "fencing") 
	{
		to_log(LOG_DEBUG(), $conf, __LINE__, "DEBUG: Expected fencd to do a fence action, got $fenced_state1->{wait_state}\n");
		return;
	}

	my $to_fence_node_id = $conf->{nodes}{$conf->{env}{DRBD_PEERS}}{id};
	if ($fenced_state1->{victim_now} != $to_fence_node_id)
	{
		to_log(LOG_ERR(), $conf, __LINE__, "Fenced is shooting at $fenced_state1->{victim_now}; Should shoot at $to_fence_node_id\n");
	}

	my $fenced_state2 = wait_for_fenced_status($conf, "none", 240);
	if ($fenced_state2->{wait_state} eq "none")
	{
		to_log(LOG_INFO(), $conf, __LINE__, "Seems fenced was successfull\n");
		exit 7;
	}
	else
	{
		to_log(LOG_ERR(), $conf, __LINE__, "Fenced failed" . Dumper($fenced_state1) . Dumper($fenced_state2));
	}
}