import datetime import logging from systemd.journal import JournalHandler # Logging log = logging.getLogger(__name__) log.addHandler(JournalHandler()) class HostData: """ Data related to notifications related to a given host """ def __init__(self, name): self.name = name # Concerning host self.type = "" self.status = "OK" self.downtime = False # Concerning services self.statuses = {} self.types = {} # Tools self.counts = {"CRITICAL":0, "WARNING":0, "OK":0} self.maintainer = "Tout le monde" class DataStore: def __init__(self, linkedBot): log.info("Created DataStore") self.knownHosts = {} self.knownMaintainers = {} self.linkedBot = linkedBot def push(self, msg): """ Process messages like TYPE|HOST/SERVICE|STATE|OUTPUT|SENDER|COMMENT """ # Get current time curtime = datetime.datetime.now().strftime("%m/%d/%Y, %H:%M:%S") # Get all params destmuc, type, location, status, text, sender, comment = msg.split("|") # Check if message is about a service or host try: host, service = location.split("/") except ValueError: host = location.split("/")[0] service = '' # Create raw text from notification raw = "%s [%s/%s]: %s (%s %s)" % (curtime, type, status, text, sender, comment) cur = None log.info("Datastore received: %s" % msg) log.info("Datastore understood: %s" % raw) # Look for host if host in self.knownHosts: cur = self.knownHosts[host] # It's a service if service != "": # is it known ? if service in cur.statuses: # does the status change ? if cur.statuses[service] != status: # update if not cur.statuses[service] in cur.counts: cur.counts[status] = 0 cur.counts[cur.statuses[service]] -= 1 if not status in cur.counts: cur.counts[status] = 0 cur.counts[status] += 1 cur.statuses[service] = status cur.types[service] = status else: # create status entry cur.statuses[service] = status if not status in cur.counts: cur.counts[status] = 0 cur.counts[status] += 1 # It's not a service (so general) else: # does the status change ? if cur.status != status: cur.status = status cur.type = type # Host is not known else: # create host self.knownHosts[host] = HostData(host) cur = self.knownHosts[host] # It's a service if service != "": # create status entry cur.statuses[service] = status if not status in cur.counts: cur.counts[status] = 0 cur.counts[status] += 1 # It's not a service (so general) else: # create status entry cur.status = status cur.type = type # Update history if not cur.maintainer in self.knownMaintainers: self.knownMaintainers[cur.maintainer] = {} if not host in self.knownMaintainers[cur.maintainer]: self.knownMaintainers[cur.maintainer][host] = [] self.knownMaintainers[cur.maintainer][host].append(raw) # Is there only one service or more problems for this host? problemCount = 0 for cstatus in cur.counts: if not "OK" in cstatus: problemCount += cur.counts[cstatus] # If this notification is a problem if not "OK" in status: # Unknown state if "UNKNOWN" in cur.type and problemCount == 0: message = "état inconnu sur (%s)" % (host, text) # send notification log.info("Sending to %s: %s" % (destmuc, message)) self.linkedBot.push(destmuc, cur.maintainer+", "+message) # Unknown state elif "UNKNOWN" in cur.type: pass # General problem elif not "OK" in cur.status and problemCount == 0: message = "je détecte un problème général (%s)" \ " sur %s (%s)" % (status, host, text) # send notification log.info("Sending to %s: %s" % (destmuc, message)) self.linkedBot.push(destmuc, cur.maintainer+", "+message) # Only one service has a problem elif service and problemCount == 1: message = "je détecte un problème (%s) sur le service %s de" \ " la machine %s" \ " (%s)" % (status, service, host, text) # send notification log.info("Sending to %s: %s" % (destmuc, message)) self.linkedBot.push(destmuc, cur.maintainer+", "+message) # Multiple problems else: message = "je détecte de multiples problèmes " \ "sur la machine %s\n" % (host) # create recap from statuses that are not OK for cstatus in cur.counts: if not "OK" in cstatus: message += "%s %s(s), " % \ (str(cur.counts[cstatus]), cstatus) message = message[:-2] # send notification log.info("Sending to %s: %s" % (destmuc, message)) self.linkedBot.push(destmuc, cur.maintainer+", "+message) # We have a recovery else: if "DOWNTIMESTART" in cur.type and not self.downtime: message = "début de DOWNTIME" \ " sur %s" % (host) cur.downtime = True # send notification log.info("Sending to %s: %s" % (destmuc, message)) self.linkedBot.push(destmuc, cur.maintainer+", "+message) elif "DOWNTIMEEND" in cur.type and self.downtime: message = "fin de DOWNTIME" \ " sur %s" % (host) cur.downtime = False # send notification log.info("Sending to %s: %s" % (destmuc, message)) self.linkedBot.push(destmuc, cur.maintainer+", "+message) # ignore unregistered downtimes elif "DOWNTIME" in cur.type: pass # General problem elif not service and problemCount == 0: message = "fin d'alerte générale sur" \ " sur %s (%s)" % (host, text) # send notification log.info("Sending to %s: %s" % (destmuc, message)) self.linkedBot.push(destmuc, cur.maintainer+", "+message) # Only one service has a problem elif service and problemCount == 0: message = "résolution du problème sur le service %s de" \ " la machine %s" \ " (%s)\n" % (service, host, text) # create recap from statuses that are not OK for cstatus in cur.counts: message += "%s %s(s), " % \ (str(cur.counts[cstatus]), cstatus) message = message[:-2] # send notification log.info("Sending to %s: %s" % (destmuc, message)) self.linkedBot.push(destmuc, cur.maintainer+", "+message) # Resolution but multiple problems else: message = "résolution d'alertes en cours " \ "sur la machine %s\n" % (host) # create recap from statuses for cstatus in cur.counts: message += "%s %s(s), " % \ (str(cur.counts[cstatus]), cstatus) message = message[:-2] # send notification log.info("Sending to %s: %s" % (destmuc, message)) self.linkedBot.push(destmuc, cur.maintainer+", "+message) log.info("Datastore known hosts: %s" % str(self.knownHosts)) log.info("Datastore known maintainers: %s" % str(self.knownHosts))