#!/usr/bin/env ruby require 'socket' require 'timeout' require 'rubygems' require 'net/ssh' HOSTS = %w{ 10.10.1.10 10.10.1.11 10.10.1.12 10.10.1.13 } class WatchListener attr_accessor :host def initialize(host) self.host = host probe_ports end def probe_ports 7000.upto(7006) do |port| begin socket = TCPSocket.new @host, port socket.puts "dummytext\n\ndummytext" begin Timeout.timeout(15) { socket.gets } rescue Timeout::Error log "%d IS HUNG! RESTARTING..." % port restart_listener_on port else log "%d working fine" % port if ENV['DEBUG'] end rescue Errno::ECONNREFUSED log "%d refuses connection" end end end def restart_listener_on(port) exec %{ PID=`netstat -a -n -p | grep #{port} | grep LISTEN | awk '{print $7}' | cut -d'/' -f1` echo "killing $PID" && kill -9 $PID } end def exec(command) log "connecting.." Net::SSH.start(host) do |session| input, output, error = session.process.popen3(command) timeout(20) { log output.read } rescue nil input.puts "quit" end log "done" end def log(text) puts "[%s] (%s) %s" % [ Time.now.strftime("%H:%M:%S"), @host, text ] end end # Main loop while true do HOSTS.each do |host| begin WatchListener.new host rescue => error puts "Exception raised: #{error}" end end print "Sleeping 300 seconds: " 5.times do |i| print "%d .. " % (300 - i * 60) STDOUT.flush sleep 60 end puts end