# -*- coding: utf-8 -*- # Copyright © 2015-2017 Carl Chenet # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see 3 or len(confobjects) == 2: sys.exit('This line in the list of uri to parse is not formatted correctly: {line}'.format(line)) if len(confobjects) == 3: rss, rssobject, patternstring = line.split('|') if len(confobjects) == 1: rss = confobjects[0] rssobject = '' patternstring = '' # split different searched patterns patterns = [i for i in patternstring.split(self.stringsep) if i] # retrieve the content of the rss feed = feedparser.parse(rss) if 'bozo_exception' in feed: bozoexception = True logging.warning(feed['bozo_exception']) continue # check if the rss feed and the rss entry are valid ones if 'entries' in feed: if rssobject and rssobject not in feed['entries'][0].keys(): sys.exit('The rss object {rssobject} could not be found in the feed {rss}'.format(rssobject=rssobject, rss=rss)) else: sys.exit('The rss feed {rss} does not seem to be valid'.format(rss=rss)) feeds.append({'feed': feed, 'patterns': patterns, 'rssobject': rssobject}) # test if all feeds in the list were unsuccessfully retrieved and if so, leave if not feeds and bozoexception: sys.exit('No feed could be retrieved. Leaving.') ############################ # uri ############################ if not feeds and not self.clioptions.rss_uri: confoption = 'uri' if config.has_option(section, confoption): options['rss_uri'] = config.get('rss', 'uri') else: sys.exit('{confoption} parameter in the [{section}] section of the configuration file is mandatory. Exiting.'.format(section=section, confoption=confoption)) else: options['rss_uri'] = self.clioptions.rss_uri # get the rss feed for rss parameter of [rss] section feed = feedparser.parse(options['rss_uri']) if not feed: sys.exit('Unable to parse the feed at the following url: {rss}'.format(rss=rss)) ######################################### # no_uri_pattern_no_global_pattern option ######################################### currentoption = 'no_uri_pattern_no_global_pattern' # default value options['nopatternurinoglobalpattern'] = False if config.has_option(section, currentoption): options['nopatternurinoglobalpattern'] = config.getboolean(section, currentoption) ########################### # # the cache section # ########################### section = 'cache' if not self.clioptions.cachefile: confoption = 'cachefile' if config.has_section(section): options['cachefile'] = config.get(section, confoption) else: sys.exit('You should provide a {confoption} parameter in the [{section}] section'.format(section=section, confoption=confoption)) options['cachefile'] = os.path.expanduser(options['cachefile']) cachefileparent = os.path.dirname(options['cachefile']) if cachefileparent and not os.path.exists(cachefileparent): sys.exit('The parent directory of the cache file does not exist: {cachefileparent}'.format(cachefileparent=cachefileparent)) else: options['cachefile'] = self.clioptions.cachefile ### cache limit if config.has_section(section): confoption = 'cache_limit' if config.has_option(section, confoption): try: options['cache_limit'] = int(config.get(section, confoption)) except ValueError as err: sys.exit('Error in configuration with the {confoption} parameter in [{section}]: {err}'.format(confoption=confoption, section=section, err=err)) else: options['cache_limit'] = 100 else: options['cache_limit'] = 100 ########################### # # the hashtag section # ########################### section = 'hashtaglist' if not self.clioptions.hashtaglist: confoption = 'several_words_hashtags_list' if config.has_section(section): options['hashtaglist'] = config.get(section, confoption) options['hashtaglist'] = os.path.expanduser(options['hashtaglist']) if not os.path.exists(options['hashtaglist']) or not os.path.isfile(options['hashtaglist']): sys.exit('The path to the several_words_hashtags_list parameter is not valid: {hashtaglist}'.format(hashtaglist=options['hashtaglist'])) else: options['hashtaglist'] = False ########################### # # the plugins section # ########################### plugins = {} section = 'influxdb' if config.has_section(section): ########################################## # host, port, user, pass, database options ########################################## plugins[section] = {} for currentoption in ['host','port','user','pass','database']: if config.has_option(section, currentoption): plugins[section][currentoption] = config.get(section, currentoption) if 'host' not in plugins[section]: plugins[section]['host'] = '127.0.0.1' if 'port' not in plugins[section]: plugins[section]['port'] = 8086 if 'measurement' not in plugins[section]: plugins[section]['measurement'] = 'tweets' for field in ['user','pass','database']: if field not in plugins[section]: sys.exit('Parsing error for {field} in the [{section}] section: {field} is not defined'.format(field=field, section=section)) # create the returned object with previously parsed data if feeds: self.confs.append((options, config, self.tweetformat, feeds, plugins)) else: self.confs.append((options, config, self.tweetformat, [{'feed': feed, 'patterns': [], 'rssobject': ''}], plugins)) @property def confvalues(self): '''Return the values of the different configuration files''' return self.confs