Skip to content

Commit

Permalink
sync
Browse files Browse the repository at this point in the history
  • Loading branch information
Christos Mantas committed Dec 4, 2014
1 parent b6b9402 commit 9fb544d
Show file tree
Hide file tree
Showing 79 changed files with 1,408 additions and 69,727 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -37,3 +37,4 @@ nosetests.xml

*.log
*.png
*/measurements/*
255 changes: 255 additions & 0 deletions Monitor2.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,255 @@
__author__ = 'cmantas'
import numpy as np
from lib.tiramola_logging import get_logger
import socket
import sys
import xml.parsers.expat
LOG_FILENAME = 'files/logs/Coordinator.log'
#import ClientsCluster as Clients
import CassandraCluster as Servers
from time import sleep
from threading import Thread
from lib.persistance_module import env_vars
import os


class GParser:

def __init__(self):
self.inhost =0
self.inmetric = 0
self.allmetrics = {}
self.currhostname = ""

def parse(self, ganglia_file):
"""
parses an xml ganglia file
:param ganglia_file:
:return: a dictionary of all ganlia metrics and their values
"""
p = xml.parsers.expat.ParserCreate()
p.StartElementHandler = self.start_element
p.EndElementHandler = self.end_element
p.ParseFile(ganglia_file)

if self.allmetrics == {}: raise Exception('Host/value not found')
return self.allmetrics

def start_element(self, name, attrs):

# edo xtizo to diplo dictionary. vazo nodes kai gia kathe node vazo polla metrics.
#print attrs
if name == "HOST":
#if attrs["NAME"]==self.host:
self.allmetrics[attrs["NAME"]]={}
# edo ftiaxno ena adeio tuple me key to onoma tou node kai value ena adeio dictionary object.
self.inhost=1
self.currhostname = attrs["NAME"]
#print "molis mpika sto node me dns " , self.currhostname

elif self.inhost == 1 and name == "METRIC": # and attrs["NAME"]==self.metric:
#print "attrname: " , attrs["NAME"] , " attr value: " , attrs["VAL"]
self.allmetrics[self.currhostname][attrs["NAME"]] = attrs["VAL"]

def end_element(self, name):
if name == "HOST" and self.inhost==1:
self.inhost = 0
self.currhostname = ""


class MonitorVms:
def __init__(self, monitoring_address, monitoring_port=8649):

self.ganglia_host = monitoring_address
self.ganglia_port = monitoring_port

## Install logger

self.my_logger = get_logger("MonitorVMs", "INFO", logfile=LOG_FILENAME)

self.allmetrics={}
self.parser = GParser()
# initialize parser object. in the refreshMetrics function call the .parse of the
# parser to update the dictionary object.
self.pullMetrics()


def pullMetrics(self):
"""
runs periodically and refreshes the metrics
:return:
"""
#self.my_logger.debug("Refreshing metrics from %s:%s" % (self.ganglia_host, self.ganglia_port))
for res in socket.getaddrinfo(self.ganglia_host, self.ganglia_port, socket.AF_UNSPEC, socket.SOCK_STREAM):
af, socktype, proto, canonname, sa = res
try:
self.soc = socket.socket(af, socktype, proto)
except socket.error as msg:
s = None
continue
try:
self.soc.connect(sa)
except socket.error as msg:
self.soc.close()
self.soc = None
continue
break
if self.soc is None:
print 'could not open socket %s:%s' % (str(self.ganglia_host), str(self.ganglia_port))
sys.exit(1)
self.allmetrics = None
f = self.soc.makefile("r")
self.allmetrics = self.parser.parse(f)
f.close()
f = None
self.soc.close()
#self.my_logger.debug("REFRESHMETRICS allmetrics: "+str(self.allmetrics))
return self.allmetrics



class YCSB_Monitor():

log = get_logger("MONITOR", "DEBUG")

def __init__(self, monitoring_endpoint, measurementsFile = env_vars["measurements_file"]):
self.monitoring_endpoint = monitoring_endpoint
self.monVms = MonitorVms(monitoring_endpoint)
self.thread = None
self.measurementsPolicy = 'centroid'
self.measurementsFile = measurementsFile
# A dictionary that will remember rewards and metrics in states previously visited
self.memory = {}
for i in range(env_vars["min_cluster_size"], env_vars["max_cluster_size"] + 1):
self.memory[str(i)] = {}
#self.memory[str(i)]['V'] = None # placeholder for rewards and metrics
self.memory[str(i)]['r'] = None
self.memory[str(i)]['arrayMeas'] = None

# Load any previous statics.
self.measurementsFile = env_vars["measurements_file"]
self.trainingFile = env_vars["training_file"]
self.sumMetrics = {}
# initialize measurements file
meas = open(self.measurementsFile, 'a+')
if os.stat(self.measurementsFile).st_size == 0:
# The file is empty, set the headers for each column.
meas.write('State\t\tLambda\t\tThroughput\t\tLatency\t\tCPU\t\tTime\n')
meas.close()

# load training set
meas = open(self.trainingFile, 'r+')
if os.stat(self.trainingFile).st_size != 0:
# Read the training set measurements saved in the file.
meas.next() # Skip the first line with the headers of the columns
for line in meas:
# Skip comments (used in training sets)
if not line.startswith('###'):
m = line.split('\t\t')
self.add_measurement(m)
meas.close()


def add_measurement(self, metrics, write_mem=True, write_file=False):
"""
adds the measurement to either memory or file or both
@param metrics: array The metrics to store. An array containing [state, lamdba, throughput, latency, time]
@param writeFile: boolean If set write the measurement in the txt file
:return:
"""
if self.measurementsPolicy.startswith('average'):
if not self.sumMetrics.has_key(metrics[0]):
# Save the metric with the state as key metrics = [state, inlambda, throughput, latency]
self.sumMetrics[metrics[0]] = {'inlambda': 0.0, 'throughput': 0.0, 'latency': 0.0, 'divide_by': 0}

self.sumMetrics[metrics[0]] = {'inlambda': self.sumMetrics[metrics[0]]['inlambda'] + float(metrics[1]),
'throughput': self.sumMetrics[metrics[0]]['throughput'] + float(metrics[2]),
'latency': self.sumMetrics[metrics[0]]['latency'] + float(metrics[3]),
'divide_by': self.sumMetrics[metrics[0]]['divide_by'] + 1}
if write_mem:
# metrics-> 0: state, 1: lambda, 2: thoughtput, 3:latency, 4:cpu, 5:time
if not self.memory.has_key(metrics[0]):
self.memory[str(metrics[0])] = {}
#self.memory[str(metrics[0])]['V'] = None # placeholder for rewards and metrics
self.memory[str(metrics[0])]['r'] = None
self.memory[str(metrics[0])]['arrayMeas'] = np.array([float(metrics[1]), float(metrics[2]),
float(metrics[3]), float(metrics[4])], ndmin=2)
elif self.memory[metrics[0]]['arrayMeas'] is None:
self.memory[metrics[0]]['arrayMeas'] = np.array([float(metrics[1]), float(metrics[2]),
float(metrics[3]), float(metrics[4])], ndmin=2)
else:
self.memory[metrics[0]]['arrayMeas'] = np.append(self.memory[metrics[0]]['arrayMeas'],
[[float(metrics[1]), float(metrics[2]),
float(metrics[3]), float(metrics[4])]], axis=0)
# but add 1 zero measurement for each state for no load cases ??? too many 0s affect centroids?
if write_file:
if write_mem:
used = "Yes"
else:
used = "No"
ms = open(self.measurementsFile, 'a')
# metrics[5] contains the time tick -- when running a simulation, it represents the current minute,
# on actual experiments, it is the current time. Used for debugging and plotting
ms.write(str(metrics[0]) + '\t\t' + str(metrics[1]) + '\t\t' + str(metrics[2]) + '\t\t' +
str(metrics[3]) + '\t\t' + str(metrics[4]) + '\t\t' + str(metrics[5]) + '\t\t'+ used+'\n')
ms.close()


def get_metrics(self):
inmetrics = self.monVms.pullMetrics()
out_metrics = {}


# avoid type errors at start up
if not inmetrics.has_key('inlambda'):
out_metrics['inlambda'] = 0

if not inmetrics.has_key('throughput'):
out_metrics['throughput'] = 0

if not inmetrics.has_key('latency'):
out_metrics['latency'] = 0

if not inmetrics.has_key('cpu'):
out_metrics['cpu'] = 0
## Aggreggation of YCSB client metrics
clients = 0
# We used to collect server cpu too, do we need it?
#self.log.debug("TAKEDECISION state: %d, pending action: %s. Collecting metrics" % (self.currentState, str(self.pending_action)))
for host in inmetrics.keys():
metric = inmetrics[host]
if isinstance(metric, dict):
for key in metric.keys():
if key.startswith('ycsb_TARGET'):
out_metrics['inlambda'] += float(metric[key])
elif key.startswith('ycsb_THROUGHPUT'):
out_metrics['throughput'] += float(metric[key])
elif key.startswith('ycsb_READ') or key.startswith('ycsb_UPDATE') or key.startswith(
'ycsb_RMW') or key.startswith('ycsb_INSERT'):
out_metrics['latency'] += float(metric[key])
elif key.startswith('cpu_user'):
out_metrics['cpu'] += float(metric[key])
#self.log.debug("Latency : "+ str(host[key]) +" collected from client: "+ str(key)
# +" latency so far: "+ str(inmetrics['latency']))
if metric[key] > 0:
clients += 1
try:
out_metrics['latency'] = out_metrics['latency'] / clients
#self.my_logger.debug("Final latency for this measurement: "+ str(allmetrics['latency']) +" by "
# + str(clients)+ " clients!")
except:
out_metrics['latency'] = 0

return out_metrics




if __name__ == "__main__":
print "hello"

monitor = YCSB_Monitor("83.212.118.57", measurementsFile="dummy.log")

while True:
print monitor.get_metrics()
sleep(3)
83 changes: 16 additions & 67 deletions Monitoring.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,7 @@ def refreshMetrics(self):
self.soc = None
attempts = 0
while self.soc is None and attempts < 3:
attempts += 1
try:
self.soc = socket.socket(af, socktype, proto)
except socket.error as msg:
Expand All @@ -97,15 +98,20 @@ def refreshMetrics(self):
except socket.error as msg:
self.soc.close()
self.soc = None
sleep(1)

sleep(10)
self.my_logger.error("Failed to connect to ganglia endpoint: " + str(self.ganglia_host) + " "+str(msg) + " attempt " + str(attempts) )
continue
break
if self.soc is None:
print 'could not open socket %s:%s' % (str(self.ganglia_host), str(self.ganglia_port))
raise Exception('could not open socket %s:%s (%s)' % (str(self.ganglia_host), str(self.ganglia_port), str(msg)))
sys.exit(1)
self.allmetrics = None
f = self.soc.makefile("r")
self.allmetrics = self.parser.parse(f)
try:
self.allmetrics = self.parser.parse(f)
except:
self.my_logger.error("Failed to parse xml from ganglia")
f.close()
f = None
self.soc.close()
Expand All @@ -122,75 +128,18 @@ def usage():
-c|--critical= [-s|--server=] [-p|--port=] """
sys.exit(3)




if __name__ == "__main__":
##############################################################
# ganglia_host = 'clusterhead'
# ganglia_port = 8649
# host = 'clusterhead'
# metric = 'swap_free'
# warning = None
# critical = None


# try:
# options, args = getopt.getopt(sys.argv[1:],
# "h:m:w:c:s:p:",
# ["host=", "metric=", "warning=", "critical=", "server=", "port="],
# )
# except getopt.GetoptError, err:
# print "check_gmond:", str(err)
# usage()
# sys.exit(3)
#
# for o, a in options:
# if o in ("-h", "--host"):
# host = a
# elif o in ("-m", "--metric"):
# metric = a
# elif o in ("-w", "--warning"):
# warning = float(a)
# elif o in ("-c", "--critical"):
# critical = float(a)
# elif o in ("-p", "--port"):
# ganglia_port = int(a)
# elif o in ("-s", "--server"):
# ganglia_host = a
#
# if critical == None or warning == None or metric == None or host == None:
# usage()
#
ganglia_host = '83.212.118.57'
ganglia_port = 8649

#monVms = MonitorVms(cluster_manager.get_hosts())
monVms = MonitorVms( "83.212.121.246")
monVms = MonitorVms(ganglia_host, ganglia_port)


sleep(5)
allmetrics=monVms.refreshMetrics()
print "allmetrics length ", len(allmetrics)


# try:
# print "ganglia host " + ganglia_host
# print 'host ' + host
# s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
# s.connect((ganglia_host,ganglia_port))
## file = s.makefile("r")
## print file.read()
# parser = GParser(host, metric)
## print "outside function" , s.makefile("r")
# value = parser.parse(s.makefile("r"))
#
# s.close()
# except Exception, err:
# print "CHECKGANGLIA UNKNOWN: Error while getting value \"%s\"" % (err)
# sys.exit(3)
#
# if value >= critical:
# print "CHECKGANGLIA CRITICAL: %s is %.2f" % (metric, value)
# sys.exit(2)
# elif value >= warning:
# print "CHECKGANGLIA WARNING: %s is %.2f" % (metric, value)
# sys.exit(1)
# else:
# print "CHECKGANGLIA OK: %s is %.2f" % (metric, value)
# sys.exit(0)
print allmetrics.keys()
2 changes: 1 addition & 1 deletion VM.py
Original file line number Diff line number Diff line change
Expand Up @@ -227,7 +227,7 @@ def wait_ready(self):
while not self.created: sleep(3)
self.log.debug("Waiting for SSH daemon (%s)" % self.get_public_addr())
#time to stop trying
end_time = datetime.now()+timedelta(seconds=ssh_giveup_timeout)
end_time = datetime.now()+timedelta(seconds=env_vars['ssh_giveup_timeout'])
self.log.debug("end time:"+str(end_time))
timer = Timer()
timer.start()
Expand Down
Loading

0 comments on commit 9fb544d

Please sign in to comment.