2022-02-08 15:46:34 +01:00
|
|
|
import datetime
|
|
|
|
import logging
|
|
|
|
|
|
|
|
from systemd.journal import JournalHandler
|
|
|
|
|
|
|
|
|
|
|
|
# Logging
|
|
|
|
log = logging.getLogger(__name__)
|
|
|
|
log.addHandler(JournalHandler())
|
2022-02-08 20:25:51 +01:00
|
|
|
|
2022-02-08 15:46:34 +01:00
|
|
|
|
|
|
|
class HostData:
|
|
|
|
"""
|
|
|
|
Data related to notifications related to a given host
|
|
|
|
"""
|
|
|
|
|
|
|
|
def __init__(self, name):
|
|
|
|
self.name = name
|
|
|
|
|
|
|
|
# Concerning host
|
2022-02-08 20:25:51 +01:00
|
|
|
self.type = ""
|
|
|
|
self.status = "OK"
|
2022-02-14 19:13:17 +01:00
|
|
|
self.downtime = False
|
2022-02-08 20:25:51 +01:00
|
|
|
|
|
|
|
# Concerning services
|
|
|
|
self.statuses = {}
|
|
|
|
self.types = {}
|
2022-02-08 15:46:34 +01:00
|
|
|
|
|
|
|
# Tools
|
2022-02-08 20:25:51 +01:00
|
|
|
self.counts = {"CRITICAL":0, "WARNING":0, "OK":0}
|
2022-02-08 15:46:34 +01:00
|
|
|
self.maintainer = "Tout le monde"
|
|
|
|
|
2022-02-08 20:25:51 +01:00
|
|
|
|
2022-02-08 15:46:34 +01:00
|
|
|
class DataStore:
|
|
|
|
|
|
|
|
def __init__(self, linkedBot):
|
|
|
|
|
|
|
|
log.info("Created DataStore")
|
|
|
|
|
|
|
|
self.knownHosts = {}
|
|
|
|
self.knownMaintainers = {}
|
|
|
|
self.linkedBot = linkedBot
|
|
|
|
|
|
|
|
def push(self, msg):
|
2022-02-08 20:25:51 +01:00
|
|
|
"""
|
|
|
|
Process messages like TYPE|HOST/SERVICE|STATE|OUTPUT|SENDER|COMMENT
|
|
|
|
"""
|
2022-02-08 15:46:34 +01:00
|
|
|
|
|
|
|
# Get current time
|
|
|
|
curtime = datetime.datetime.now().strftime("%m/%d/%Y, %H:%M:%S")
|
|
|
|
# Get all params
|
2022-02-08 20:25:51 +01:00
|
|
|
destmuc, type, location, status, text, sender, comment = msg.split("|")
|
|
|
|
# Check if message is about a service or host
|
|
|
|
try:
|
2022-02-08 15:46:34 +01:00
|
|
|
host, service = location.split("/")
|
2022-02-08 20:25:51 +01:00
|
|
|
except ValueError:
|
|
|
|
host = location.split("/")[0]
|
|
|
|
service = ''
|
|
|
|
|
|
|
|
# Create raw text from notification
|
|
|
|
raw = "%s [%s/%s]: %s (%s %s)" % (curtime, type, status, text,
|
|
|
|
sender, comment)
|
|
|
|
|
|
|
|
cur = None
|
|
|
|
|
|
|
|
log.info("Datastore received: %s" % msg)
|
|
|
|
log.info("Datastore understood: %s" % raw)
|
|
|
|
|
|
|
|
# Look for host
|
|
|
|
if host in self.knownHosts:
|
|
|
|
cur = self.knownHosts[host]
|
|
|
|
|
|
|
|
# It's a service
|
|
|
|
if service != "":
|
|
|
|
# is it known ?
|
|
|
|
if service in cur.statuses:
|
|
|
|
# does the status change ?
|
|
|
|
if cur.statuses[service] != status:
|
|
|
|
# update
|
|
|
|
if not cur.statuses[service] in cur.counts:
|
|
|
|
cur.counts[status] = 0
|
|
|
|
cur.counts[cur.statuses[service]] -= 1
|
|
|
|
if not status in cur.counts:
|
|
|
|
cur.counts[status] = 0
|
|
|
|
cur.counts[status] += 1
|
|
|
|
cur.statuses[service] = status
|
|
|
|
cur.types[service] = status
|
|
|
|
else:
|
|
|
|
# create status entry
|
|
|
|
cur.statuses[service] = status
|
|
|
|
if not status in cur.counts:
|
|
|
|
cur.counts[status] = 0
|
|
|
|
cur.counts[status] += 1
|
|
|
|
|
|
|
|
# It's not a service (so general)
|
|
|
|
else:
|
|
|
|
# does the status change ?
|
|
|
|
if cur.status != status:
|
|
|
|
cur.status = status
|
|
|
|
cur.type = type
|
|
|
|
|
|
|
|
# Host is not known
|
|
|
|
else:
|
|
|
|
# create host
|
|
|
|
self.knownHosts[host] = HostData(host)
|
|
|
|
cur = self.knownHosts[host]
|
|
|
|
|
|
|
|
# It's a service
|
|
|
|
if service != "":
|
|
|
|
# create status entry
|
|
|
|
cur.statuses[service] = status
|
|
|
|
if not status in cur.counts:
|
|
|
|
cur.counts[status] = 0
|
|
|
|
cur.counts[status] += 1
|
|
|
|
|
|
|
|
# It's not a service (so general)
|
|
|
|
else:
|
|
|
|
# create status entry
|
|
|
|
cur.status = status
|
|
|
|
cur.type = type
|
|
|
|
|
|
|
|
# Update history
|
|
|
|
if not cur.maintainer in self.knownMaintainers:
|
|
|
|
self.knownMaintainers[cur.maintainer] = {}
|
|
|
|
|
|
|
|
if not host in self.knownMaintainers[cur.maintainer]:
|
|
|
|
self.knownMaintainers[cur.maintainer][host] = []
|
|
|
|
|
|
|
|
self.knownMaintainers[cur.maintainer][host].append(raw)
|
|
|
|
|
|
|
|
# Is there only one service or more problems for this host?
|
|
|
|
problemCount = 0
|
|
|
|
for cstatus in cur.counts:
|
|
|
|
if not "OK" in cstatus:
|
|
|
|
problemCount += cur.counts[cstatus]
|
|
|
|
|
|
|
|
# If this notification is a problem
|
|
|
|
if not "OK" in status:
|
2022-02-14 17:41:42 +01:00
|
|
|
# Unknown state
|
|
|
|
if "UNKNOWN" in cur.type and problemCount == 0:
|
|
|
|
message = "état inconnu sur (%s)" % (host, text)
|
|
|
|
# send notification
|
|
|
|
log.info("Sending to %s: %s" % (destmuc, message))
|
|
|
|
self.linkedBot.push(destmuc, cur.maintainer+", "+message)
|
|
|
|
# Unknown state
|
|
|
|
elif "UNKNOWN" in cur.type:
|
|
|
|
pass
|
|
|
|
|
2022-02-08 20:25:51 +01:00
|
|
|
# General problem
|
2022-02-14 17:41:42 +01:00
|
|
|
elif not "OK" in cur.status and problemCount == 0:
|
2022-02-08 20:25:51 +01:00
|
|
|
message = "je détecte un problème général (%s)" \
|
|
|
|
" sur %s (%s)" % (status, host, text)
|
|
|
|
# send notification
|
|
|
|
log.info("Sending to %s: %s" % (destmuc, message))
|
|
|
|
self.linkedBot.push(destmuc, cur.maintainer+", "+message)
|
|
|
|
|
|
|
|
# Only one service has a problem
|
|
|
|
elif service and problemCount == 1:
|
|
|
|
message = "je détecte un problème (%s) sur le service %s de" \
|
|
|
|
" la machine %s" \
|
|
|
|
" (%s)" % (status, service, host, text)
|
|
|
|
# send notification
|
|
|
|
log.info("Sending to %s: %s" % (destmuc, message))
|
|
|
|
self.linkedBot.push(destmuc, cur.maintainer+", "+message)
|
|
|
|
|
|
|
|
# Multiple problems
|
|
|
|
else:
|
|
|
|
message = "je détecte de multiples problèmes " \
|
|
|
|
"sur la machine %s\n" % (host)
|
|
|
|
# create recap from statuses that are not OK
|
|
|
|
for cstatus in cur.counts:
|
|
|
|
if not "OK" in cstatus:
|
|
|
|
message += "%s %s(s), " % \
|
|
|
|
(str(cur.counts[cstatus]), cstatus)
|
|
|
|
message = message[:-2]
|
|
|
|
# send notification
|
|
|
|
log.info("Sending to %s: %s" % (destmuc, message))
|
|
|
|
self.linkedBot.push(destmuc, cur.maintainer+", "+message)
|
|
|
|
|
|
|
|
# We have a recovery
|
2022-02-08 15:46:34 +01:00
|
|
|
else:
|
|
|
|
|
2022-02-14 19:13:17 +01:00
|
|
|
if "DOWNTIMESTART" in cur.type and not self.downtime:
|
|
|
|
message = "début de DOWNTIME" \
|
|
|
|
" sur %s" % (host)
|
|
|
|
cur.downtime = True
|
|
|
|
# send notification
|
|
|
|
log.info("Sending to %s: %s" % (destmuc, message))
|
|
|
|
self.linkedBot.push(destmuc, cur.maintainer+", "+message)
|
|
|
|
elif "DOWNTIMEEND" in cur.type and self.downtime:
|
|
|
|
message = "fin de DOWNTIME" \
|
|
|
|
" sur %s" % (host)
|
|
|
|
cur.downtime = False
|
|
|
|
# send notification
|
|
|
|
log.info("Sending to %s: %s" % (destmuc, message))
|
|
|
|
self.linkedBot.push(destmuc, cur.maintainer+", "+message)
|
2022-02-14 19:16:13 +01:00
|
|
|
# ignore unregistered downtimes
|
|
|
|
elif "DOWNTIME" in cur.type:
|
|
|
|
pass
|
2022-02-08 20:25:51 +01:00
|
|
|
# General problem
|
2022-02-14 19:13:17 +01:00
|
|
|
elif not service and problemCount == 0:
|
2022-02-08 20:25:51 +01:00
|
|
|
message = "fin d'alerte générale sur" \
|
|
|
|
" sur %s (%s)" % (host, text)
|
|
|
|
# send notification
|
|
|
|
log.info("Sending to %s: %s" % (destmuc, message))
|
|
|
|
self.linkedBot.push(destmuc, cur.maintainer+", "+message)
|
|
|
|
|
|
|
|
# Only one service has a problem
|
|
|
|
elif service and problemCount == 0:
|
|
|
|
message = "résolution du problème sur le service %s de" \
|
|
|
|
" la machine %s" \
|
|
|
|
" (%s)\n" % (service, host, text)
|
|
|
|
# create recap from statuses that are not OK
|
|
|
|
for cstatus in cur.counts:
|
|
|
|
message += "%s %s(s), " % \
|
|
|
|
(str(cur.counts[cstatus]), cstatus)
|
|
|
|
message = message[:-2]
|
|
|
|
# send notification
|
|
|
|
log.info("Sending to %s: %s" % (destmuc, message))
|
|
|
|
self.linkedBot.push(destmuc, cur.maintainer+", "+message)
|
|
|
|
|
|
|
|
# Resolution but multiple problems
|
|
|
|
else:
|
|
|
|
message = "résolution d'alertes en cours " \
|
|
|
|
"sur la machine %s\n" % (host)
|
|
|
|
# create recap from statuses
|
|
|
|
for cstatus in cur.counts:
|
|
|
|
message += "%s %s(s), " % \
|
|
|
|
(str(cur.counts[cstatus]), cstatus)
|
|
|
|
message = message[:-2]
|
|
|
|
# send notification
|
|
|
|
log.info("Sending to %s: %s" % (destmuc, message))
|
|
|
|
self.linkedBot.push(destmuc, cur.maintainer+", "+message)
|
|
|
|
|
|
|
|
|
|
|
|
log.info("Datastore known hosts: %s" % str(self.knownHosts))
|
|
|
|
log.info("Datastore known maintainers: %s" % str(self.knownHosts))
|
2022-02-08 15:46:34 +01:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|