From eaefc2c639bb7bdd4ee160b9b2767335df054386 Mon Sep 17 00:00:00 2001 From: Carl Chenet Date: Tue, 1 Aug 2017 23:12:33 +0200 Subject: [PATCH] add conf parsing for urilist option of rss section --- feed2toot/confparsers/rss/urilist.py | 82 ++++++++++++++++++++++++++++ 1 file changed, 82 insertions(+) create mode 100644 feed2toot/confparsers/rss/urilist.py diff --git a/feed2toot/confparsers/rss/urilist.py b/feed2toot/confparsers/rss/urilist.py new file mode 100644 index 0000000..6b71c51 --- /dev/null +++ b/feed2toot/confparsers/rss/urilist.py @@ -0,0 +1,82 @@ +# -*- coding: utf-8 -*- +# Copyright © 2015-2017 Carl Chenet +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see ', line) + if not matches: + sys.exit('This line in the list of uri to parse is not formatted correctly: {line}'.format(line)) + feedname, line = matches.groups() + confobjects = line.split('|') + if len(confobjects) > 3 or len(confobjects) == 2: + sys.exit('This line in the list of uri to parse is not formatted correctly: {line}'.format(line)) + if len(confobjects) == 3: + rss, rssobject, patternstring = line.split('|') + if len(confobjects) == 1: + rss = confobjects[0] + rssobject = '' + patternstring = '' + # split different searched patterns + patterns = [i for i in patternstring.split(stringsep) if i] + # retrieve the content of the rss + feed = feedparser.parse(rss) + if 'bozo_exception' in feed: + bozoexception = True + logging.warning(feed['bozo_exception']) + if not accept_bozo_exceptions: + continue + # check if the rss feed and the rss entry are valid ones + if 'entries' in feed: + if rssobject and rssobject not in feed['entries'][0].keys(): + sys.exit('The rss object {rssobject} could not be found in the feed {rss}'.format(rssobject=rssobject, rss=rss)) + else: + sys.exit('The rss feed {rss} does not seem to be valid'.format(rss=rss)) + feeds.append({'feed': feed, 'patterns': patterns, 'rssobject': rssobject, 'feedname': feedname}) + # test if all feeds in the list were unsuccessfully retrieved and if so, leave + if not feeds and bozoexception: + sys.exit('No feed could be retrieved. Leaving.') + return feeds