# -*- coding: utf-8 -*- # Copyright (C) 2011, 2014, 2020 kaliko # Copyright (C) 2020 Adrien Bourmault # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, version 3 only. # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # You should have received a copy of the GNU General Public License # along with this program. If not, see . import datetime import threading import bs4 import time import traceback from urllib.error import URLError from feedparser import parse as feed_parse from sid.plugin import Plugin, botcmd html_escape_table = { "&": "&", '"': """, "'": "'", ">": ">", "<": "<", } def html_escape(text): """Produce entities within text.""" return ''.join(html_escape_table.get(c, c) for c in text) def strtm_to_dtm(struc_time): return datetime.datetime(*struc_time[:6]) class FeedMonitor(threading.Thread): def __init__(self, plugin): threading.Thread.__init__(self) self.feeds_list = plugin.FEEDS self.tempo = plugin.TEMPO self.plugin = plugin self.last_check = datetime.datetime.utcnow() self.seen = dict() self.thread_killed = False def _update_cache(self, feed, parsed): self.seen[feed].update({'ids': {p.id for p in parsed.entries} or {}}) # Common HTTP caching if parsed.get('etag', False): self.seen[feed].update({'cache': {'etag': parsed.etag}}) if parsed.get('modified', False): self.seen[feed].update({'cache': {'modified': parsed.modified}}) def new_posts(self, feed): """Send new posts in feed""" self.plugin.log.debug('feed: : "%s"', feed) if self.seen.get(feed) and self.seen.get(feed).get('cache'): parsed_feed = feed_parse(feed, **self.seen[feed]['cache']) else: if self.seen.get(feed): self.plugin.log.debug('No cache headers set (etag/modified)') parsed_feed = feed_parse(feed) # Cannot resolve address if 'status' not in parsed_feed: self.plugin.log.error('Error from "%s": %s.', feed, parsed_feed.bozo_exception.__repr__()) return # http caching if parsed_feed.status == 304: self.plugin.log.debug('Got 304 not modified') return # unusual return http code if parsed_feed.status != 200: self.plugin.log.warning( 'Got code %(status)d from "%(href)s" (please update).', parsed_feed) return if not self.seen.setdefault(feed): # Fills with post id when first started (prevent from posting all # entries at startup) self.seen[feed] = {'cache': None} self._update_cache(feed, parsed_feed) return title = '[%s]' % parsed_feed.feed.get('title', 'n/a') xtitle = '%s:' % html_escape( parsed_feed.feed.get('title', 'n/a')) text = [title] xhtml = [xtitle] # Detecting new post entries = {p.id for p in parsed_feed.entries} seen_ids = self.seen.get(feed).get('ids') new_entries = [p for p in parsed_feed.entries if p.id in entries - seen_ids] for post in new_entries: self.plugin.log.info(post.title) self.plugin.log.info(post.title) body = '%(title)s ' % post try: body += "\nSummary: " + bs4.BeautifulSoup('%(summary)s' % post).select(".blockquote")[0].get_text() except IndexError: pass body += '\n%(link)s' % post text.append(bs4.BeautifulSoup(body).get_text()) xpost = {'title': html_escape(post.get('title', 'n/a'))} xpost['link'] = html_escape(post.get('link',)) xbody = '{title}'.format(**xpost) xhtml.append(xbody) # Updating self.seen, entries and cache headers self._update_cache(feed, parsed_feed) if len(text) > 1: self.plugin.send(self.plugin.bot.room, {'mhtml': '
'.join(xhtml), 'mbody': '\n'.join(text)}, mtype='groupchat') def run(self): while not self.thread_killed: self.plugin.log.debug('feeds check') for feed in self.feeds_list: try: self.new_posts(feed) except ConnectionError as err: # Non fatal exception self.plugin.log.error('connection error on %s: %s', feed, err) except URLError as err: # Non fatal exception self.plugin.log.error('error for "%s": %s', feed, err.reason) except Exception as err: # Unknown execption, killing thread anyway self.plugin.log.error('feeds thread crashed: %s', err) self.plugin.log.error(''.join(traceback.format_exc())) self.thread_killed = True self.last_check = datetime.datetime.utcnow() for _ in list(range(self.tempo)): time.sleep(1) if self.thread_killed: return class Feeds(Plugin): """ .. note:: Feeds plugin depends on external module: **feedparser** """ TEMPO = 1 FEEDS = [ 'https://git.a-lec.org/gem-graph.atom?feed_token=_ApheVUd4zxLWP37g7-r' ] def __init__(self, bot): Plugin.__init__(self, bot) self.last_check = None self.th_mon = FeedMonitor(self) self.th_mon.start() def shutdown(self): self.th_mon.thread_killed = True @botcmd def feeds(self, rcv, args): """Monitors project related feeds. * ``!feeds`` : registred feeds list * ``!feeds last`` : last check time""" if 'last' in args: date = '{:%Y-%m-%d %H:%M} (utc)'.format(self.th_mon.last_check) self.reply(rcv, f'Last feeds check: {date}') return html = ['{1}'.format(html_escape(u), html_escape(u[7:]) ) for u in Feeds.FEEDS] msg = {'mbody': 'Feeds:\n' + '\n'.join(Feeds.FEEDS), 'mhtml': 'Feeds:
' + '
'.join(html)} self.reply(rcv, msg)