From 5f4d4537654c28b7154de44140ed76e386c120f3 Mon Sep 17 00:00:00 2001 From: Carl Chenet Date: Sun, 9 Apr 2017 10:30:48 +0200 Subject: [PATCH] first commit --- AUTHORS | 2 + CHANGELOG | 3 + LICENSE | 39 +++++ README.md | 59 +++++++ docs/Makefile | 177 +++++++++++++++++++ docs/source/authors.rst | 4 + docs/source/conf.py | 261 ++++++++++++++++++++++++++++ docs/source/configure.rst | 116 +++++++++++++ docs/source/index.rst | 27 +++ docs/source/install.rst | 36 ++++ docs/source/license.rst | 4 + docs/source/plugins.rst | 38 ++++ docs/source/use.rst | 56 ++++++ feed2toot.py | 36 ++++ feed2toot/__init__.py | 15 ++ feed2toot/addtags.py | 52 ++++++ feed2toot/cliparse.py | 105 +++++++++++ feed2toot/confparse.py | 231 ++++++++++++++++++++++++ feed2toot/filterentry.py | 90 ++++++++++ feed2toot/main.py | 204 ++++++++++++++++++++++ feed2toot/plugins/__init__.py | 15 ++ feed2toot/plugins/influxdbplugin.py | 42 +++++ feed2toot/removeduplicates.py | 63 +++++++ feed2toot/tootpost.py | 51 ++++++ scripts/feed2toot | 25 +++ scripts/register_feed2toot_app | 64 +++++++ setup.py | 49 ++++++ 27 files changed, 1864 insertions(+) create mode 100644 AUTHORS create mode 100644 CHANGELOG create mode 100644 LICENSE create mode 100644 README.md create mode 100644 docs/Makefile create mode 100644 docs/source/authors.rst create mode 100644 docs/source/conf.py create mode 100644 docs/source/configure.rst create mode 100644 docs/source/index.rst create mode 100644 docs/source/install.rst create mode 100644 docs/source/license.rst create mode 100644 docs/source/plugins.rst create mode 100644 docs/source/use.rst create mode 100755 feed2toot.py create mode 100644 feed2toot/__init__.py create mode 100644 feed2toot/addtags.py create mode 100644 feed2toot/cliparse.py create mode 100644 feed2toot/confparse.py create mode 100644 feed2toot/filterentry.py create mode 100755 feed2toot/main.py create mode 100644 feed2toot/plugins/__init__.py create mode 100644 feed2toot/plugins/influxdbplugin.py create mode 100644 feed2toot/removeduplicates.py create mode 100644 feed2toot/tootpost.py create mode 100755 scripts/feed2toot create mode 100755 scripts/register_feed2toot_app create mode 100755 setup.py diff --git a/AUTHORS b/AUTHORS new file mode 100644 index 0000000..0ed4805 --- /dev/null +++ b/AUTHORS @@ -0,0 +1,2 @@ +Antoine Beaupré +Carl Chenet diff --git a/CHANGELOG b/CHANGELOG new file mode 100644 index 0000000..f3b639f --- /dev/null +++ b/CHANGELOG @@ -0,0 +1,3 @@ +## [0.1] - 2017-04-09 +### Changed +- forking from feed2tweet diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..6dfcd99 --- /dev/null +++ b/LICENSE @@ -0,0 +1,39 @@ +Copyright © 2017, Carl Chenet + +Copyright © 2017 Carl Chenet +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see + +The original code was under: + +MIT License + +Copyright (c) 2012, Todd Eddy + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. diff --git a/README.md b/README.md new file mode 100644 index 0000000..c1fe61f --- /dev/null +++ b/README.md @@ -0,0 +1,59 @@ +### Feed2toot + +Feed2toot automatically parses rss feeds, identifies new posts and posts them on Twitter. +For the full documentation, [read it online](https://feed2toot.readthedocs.org/en/latest/). + +If you would like, you can [support the development of this project on Liberapay](https://liberapay.com/carlchenet/). +Alternatively you can donate cryptocurrencies: + +- BTC: 1BcdXCcLKN9PRpp6qw23FYkYuVp59dKZix +- XMR: 4Cxwvw9V6yUehv832FWPTF7FSVuWjuBarFd17QP163uxMaFyoqwmDf1aiRtS5jWgCiRsi73yqedNJJ6V1La2joznUDzkmvBr6KKHT7Dvzj + +### Quick Install + +* Install Feed2toot from PyPI + + # pip3 install feed2toot + +* Install Feed2toot from sources + *(see the installation guide for full details) + [Installation Guide](http://feed2toot.readthedocs.org/en/latest/install.html)* + + + # tar zxvf feed2toot-0.1.tar.gz + # cd feed2toot + # python3 setup.py install + # # or + # python3 setup.py install --install-scripts=/usr/bin + +### Use Feed2toot + +* Create or modify feed2toot.ini file in order to configure feed2toot: + + [mastodon] + user_credentials=feed2toot_usercred.txt + client_credentials=feed2toot_clientcred.txt + + [cache] + cachefile=cache.db + + [rss] + uri=https://www.journalduhacker.net/rss + tweet={title} {link} + + [hashtaglist] + several_words_hashtags_list=hashtags.txt + +* Launch Feed2toot + + $ feed2toot -c /path/to/feed2toot.ini + +### Authors + +* Carl Chenet +* Antoine Beaupré +* First developed by Todd Eddy + +### License + +This software comes under the terms of the GPLv3+. Previously under MIT license. See the LICENSE file for the complete text of the license. diff --git a/docs/Makefile b/docs/Makefile new file mode 100644 index 0000000..ccc02db --- /dev/null +++ b/docs/Makefile @@ -0,0 +1,177 @@ +# Makefile for Sphinx documentation +# + +# You can set these variables from the command line. +SPHINXOPTS = +SPHINXBUILD = sphinx-build +PAPER = +BUILDDIR = build + +# User-friendly check for sphinx-build +ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1) +$(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/) +endif + +# Internal variables. +PAPEROPT_a4 = -D latex_paper_size=a4 +PAPEROPT_letter = -D latex_paper_size=letter +ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source +# the i18n builder cannot share the environment and doctrees with the others +I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source + +.PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext + +help: + @echo "Please use \`make ' where is one of" + @echo " html to make standalone HTML files" + @echo " dirhtml to make HTML files named index.html in directories" + @echo " singlehtml to make a single large HTML file" + @echo " pickle to make pickle files" + @echo " json to make JSON files" + @echo " htmlhelp to make HTML files and a HTML help project" + @echo " qthelp to make HTML files and a qthelp project" + @echo " devhelp to make HTML files and a Devhelp project" + @echo " epub to make an epub" + @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" + @echo " latexpdf to make LaTeX files and run them through pdflatex" + @echo " latexpdfja to make LaTeX files and run them through platex/dvipdfmx" + @echo " text to make text files" + @echo " man to make manual pages" + @echo " texinfo to make Texinfo files" + @echo " info to make Texinfo files and run them through makeinfo" + @echo " gettext to make PO message catalogs" + @echo " changes to make an overview of all changed/added/deprecated items" + @echo " xml to make Docutils-native XML files" + @echo " pseudoxml to make pseudoxml-XML files for display purposes" + @echo " linkcheck to check all external links for integrity" + @echo " doctest to run all doctests embedded in the documentation (if enabled)" + +clean: + rm -rf $(BUILDDIR)/* + +html: + $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html + @echo + @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." + +dirhtml: + $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml + @echo + @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml." + +singlehtml: + $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml + @echo + @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml." + +pickle: + $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle + @echo + @echo "Build finished; now you can process the pickle files." + +json: + $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json + @echo + @echo "Build finished; now you can process the JSON files." + +htmlhelp: + $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp + @echo + @echo "Build finished; now you can run HTML Help Workshop with the" \ + ".hhp project file in $(BUILDDIR)/htmlhelp." + +qthelp: + $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp + @echo + @echo "Build finished; now you can run "qcollectiongenerator" with the" \ + ".qhcp project file in $(BUILDDIR)/qthelp, like this:" + @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/backupchecker.qhcp" + @echo "To view the help file:" + @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/backupchecker.qhc" + +devhelp: + $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp + @echo + @echo "Build finished." + @echo "To view the help file:" + @echo "# mkdir -p $$HOME/.local/share/devhelp/backupchecker" + @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/backupchecker" + @echo "# devhelp" + +epub: + $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub + @echo + @echo "Build finished. The epub file is in $(BUILDDIR)/epub." + +latex: + $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex + @echo + @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." + @echo "Run \`make' in that directory to run these through (pdf)latex" \ + "(use \`make latexpdf' here to do that automatically)." + +latexpdf: + $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex + @echo "Running LaTeX files through pdflatex..." + $(MAKE) -C $(BUILDDIR)/latex all-pdf + @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." + +latexpdfja: + $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex + @echo "Running LaTeX files through platex and dvipdfmx..." + $(MAKE) -C $(BUILDDIR)/latex all-pdf-ja + @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." + +text: + $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text + @echo + @echo "Build finished. The text files are in $(BUILDDIR)/text." + +man: + $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man + @echo + @echo "Build finished. The manual pages are in $(BUILDDIR)/man." + +texinfo: + $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo + @echo + @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo." + @echo "Run \`make' in that directory to run these through makeinfo" \ + "(use \`make info' here to do that automatically)." + +info: + $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo + @echo "Running Texinfo files through makeinfo..." + make -C $(BUILDDIR)/texinfo info + @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo." + +gettext: + $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale + @echo + @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale." + +changes: + $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes + @echo + @echo "The overview file is in $(BUILDDIR)/changes." + +linkcheck: + $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck + @echo + @echo "Link check complete; look for any errors in the above output " \ + "or in $(BUILDDIR)/linkcheck/output.txt." + +doctest: + $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest + @echo "Testing of doctests in the sources finished, look at the " \ + "results in $(BUILDDIR)/doctest/output.txt." + +xml: + $(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml + @echo + @echo "Build finished. The XML files are in $(BUILDDIR)/xml." + +pseudoxml: + $(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml + @echo + @echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml." diff --git a/docs/source/authors.rst b/docs/source/authors.rst new file mode 100644 index 0000000..68bc3db --- /dev/null +++ b/docs/source/authors.rst @@ -0,0 +1,4 @@ +Authors +======= + +Carl Chenet diff --git a/docs/source/conf.py b/docs/source/conf.py new file mode 100644 index 0000000..84c7fef --- /dev/null +++ b/docs/source/conf.py @@ -0,0 +1,261 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +# +# feed2toot documentation build configuration file, created by +# sphinx-quickstart on Wed Dec 17 18:25:26 2014. +# +# This file is execfile()d with the current directory set to its +# containing dir. +# +# Note that not all possible configuration values are present in this +# autogenerated file. +# +# All configuration values have a default; values that are commented out +# serve to show the default. + +import sys +import os + +# If extensions (or modules to document with autodoc) are in another directory, +# add these directories to sys.path here. If the directory is relative to the +# documentation root, use os.path.abspath to make it absolute, like shown here. +#sys.path.insert(0, os.path.abspath('.')) + +# -- General configuration ------------------------------------------------ + +# If your documentation needs a minimal Sphinx version, state it here. +#needs_sphinx = '1.1' + +# Add any Sphinx extension module names here, as strings. They can be +# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom +# ones. +extensions = [ + 'sphinx.ext.autodoc', +] + +# Add any paths that contain templates here, relative to this directory. +templates_path = ['_templates'] + +# The suffix of source filenames. +source_suffix = '.rst' + +# The encoding of source files. +#source_encoding = 'utf-8-sig' + +# The master toctree document. +master_doc = 'index' + +# General information about the project. +project = 'feed2toot' +copyright = '2017, Carl Chenet ' + +# The version info for the project you're documenting, acts as replacement for +# |version| and |release|, also used in various other places throughout the +# built documents. +# +# The short X.Y version. +version = '1.0' +# The full version, including alpha/beta/rc tags. +release = '1.0' + +# The language for content autogenerated by Sphinx. Refer to documentation +# for a list of supported languages. +#language = None + +# There are two options for replacing |today|: either, you set today to some +# non-false value, then it is used: +#today = '' +# Else, today_fmt is used as the format for a strftime call. +#today_fmt = '%B %d, %Y' + +# List of patterns, relative to source directory, that match files and +# directories to ignore when looking for source files. +exclude_patterns = [] + +# The reST default role (used for this markup: `text`) to use for all +# documents. +#default_role = None + +# If true, '()' will be appended to :func: etc. cross-reference text. +#add_function_parentheses = True + +# If true, the current module name will be prepended to all description +# unit titles (such as .. function::). +#add_module_names = True + +# If true, sectionauthor and moduleauthor directives will be shown in the +# output. They are ignored by default. +#show_authors = False + +# The name of the Pygments (syntax highlighting) style to use. +pygments_style = 'sphinx' + +# A list of ignored prefixes for module index sorting. +#modindex_common_prefix = [] + +# If true, keep warnings as "system message" paragraphs in the built documents. +#keep_warnings = False + + +# -- Options for HTML output ---------------------------------------------- + +# The theme to use for HTML and HTML Help pages. See the documentation for +# a list of builtin themes. +html_theme = 'default' + +# Theme options are theme-specific and customize the look and feel of a theme +# further. For a list of options available for each theme, see the +# documentation. +#html_theme_options = {} + +# Add any paths that contain custom themes here, relative to this directory. +#html_theme_path = [] + +# The name for this set of Sphinx documents. If None, it defaults to +# " v documentation". +#html_title = None + +# A shorter title for the navigation bar. Default is the same as html_title. +#html_short_title = None + +# The name of an image file (relative to this directory) to place at the top +# of the sidebar. +#html_logo = None + +# The name of an image file (within the static path) to use as favicon of the +# docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 +# pixels large. +#html_favicon = None + +# Add any paths that contain custom static files (such as style sheets) here, +# relative to this directory. They are copied after the builtin static files, +# so a file named "default.css" will overwrite the builtin "default.css". +html_static_path = ['_static'] + +# Add any extra paths that contain custom files (such as robots.txt or +# .htaccess) here, relative to this directory. These files are copied +# directly to the root of the documentation. +#html_extra_path = [] + +# If not '', a 'Last updated on:' timestamp is inserted at every page bottom, +# using the given strftime format. +#html_last_updated_fmt = '%b %d, %Y' + +# If true, SmartyPants will be used to convert quotes and dashes to +# typographically correct entities. +#html_use_smartypants = True + +# Custom sidebar templates, maps document names to template names. +#html_sidebars = {} + +# Additional templates that should be rendered to pages, maps page names to +# template names. +#html_additional_pages = {} + +# If false, no module index is generated. +#html_domain_indices = True + +# If false, no index is generated. +#html_use_index = True + +# If true, the index is split into individual pages for each letter. +#html_split_index = False + +# If true, links to the reST sources are added to the pages. +#html_show_sourcelink = True + +# If true, "Created using Sphinx" is shown in the HTML footer. Default is True. +#html_show_sphinx = True + +# If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. +#html_show_copyright = True + +# If true, an OpenSearch description file will be output, and all pages will +# contain a tag referring to it. The value of this option must be the +# base URL from which the finished HTML is served. +#html_use_opensearch = '' + +# This is the file name suffix for HTML files (e.g. ".xhtml"). +#html_file_suffix = None + +# Output file base name for HTML help builder. +htmlhelp_basename = 'feed2tootdoc' + + +# -- Options for LaTeX output --------------------------------------------- + +latex_elements = { +# The paper size ('letterpaper' or 'a4paper'). +#'papersize': 'letterpaper', + +# The font size ('10pt', '11pt' or '12pt'). +#'pointsize': '10pt', + +# Additional stuff for the LaTeX preamble. +#'preamble': '', +} + +# Grouping the document tree into LaTeX files. List of tuples +# (source start file, target name, title, +# author, documentclass [howto, manual, or own class]). +latex_documents = [ + ('index', 'feed2toot.tex', 'feed2toot Documentation', + 'Carl Chenet \\textless{}chaica@ohmytux.com.org\\textgreater{}', 'manual'), +] + +# The name of an image file (relative to this directory) to place at the top of +# the title page. +#latex_logo = None + +# For "manual" documents, if this is true, then toplevel headings are parts, +# not chapters. +#latex_use_parts = False + +# If true, show page references after internal links. +#latex_show_pagerefs = False + +# If true, show URL addresses after external links. +#latex_show_urls = False + +# Documents to append as an appendix to all manuals. +#latex_appendices = [] + +# If false, no module index is generated. +#latex_domain_indices = True + + +# -- Options for manual page output --------------------------------------- + +# One entry per manual page. List of tuples +# (source start file, name, description, authors, manual section). +man_pages = [ + ('index', 'feed2toot', 'feed2toot Documentation', + ['Carl Chenet '], 1) +] + +# If true, show URL addresses after external links. +#man_show_urls = False + + +# -- Options for Texinfo output ------------------------------------------- + +# Grouping the document tree into Texinfo files. List of tuples +# (source start file, target name, title, author, +# dir menu entry, description, category) +texinfo_documents = [ + ('index', 'feed2toot', 'feed2toot Documentation', + 'Carl Chenet ', 'feed2toot', 'One line description of project.', + 'Miscellaneous'), +] + +# Documents to append as an appendix to all manuals. +#texinfo_appendices = [] + +# If false, no module index is generated. +#texinfo_domain_indices = True + +# How to display URL addresses: 'footnote', 'no', or 'inline'. +#texinfo_show_urls = 'footnote' + +# If true, do not generate a @detailmenu in the "Top" node's menu. +#texinfo_no_detailmenu = False diff --git a/docs/source/configure.rst b/docs/source/configure.rst new file mode 100644 index 0000000..53ff789 --- /dev/null +++ b/docs/source/configure.rst @@ -0,0 +1,116 @@ +Configure Feed2toot +=================== + +As a prerequisite to use Feed2toot, you need to authorize a Mastodon app for your account. + +Just use the script register_feed2toot_app to register the feed2toot app for your account.:: + + $ ./register_feed2toot_app + + This app generates Mastodon app credentials needed by Feed2toot. + feed2toot_clientcred.txt and feed2toot_usercred.txt will be written in the current dir /home/chaica/progra/python/feed2toot. + One connection is initiated to create the app. + Your password is *not* stored. + + Mastodon instance url (defaults to https://mastodon.social): + Mastodon login:chaica@ohmytux.com + Mastodon password: + The feed2toot app was added to your preferences=>authorized apps page + +As described above, two files were created. You'll need them in the feed2toot configuration. + +In order to configure Feed2toot, you need to create a feed2toot.ini file (or any name you prefer, finishing with the extension .ini) with the following parameters:: + + [mastodon] + ; Here you need the two files created by register_feed2toot_app + user_credentials=/etc/feed2toot/credentials/feed2toot_usercred.txt + client_credentials=/etc/feed2toot/credentials/feed2toot_clientcred.txt + + [cache] + cachefile=/var/lib/feed2toot/feed2toot.db + cache_limit=10000 + + [rss] + uri: https://www.journalduhacker.net/rss + uri_list: /etc/feed2toot//rsslist.txt + tweet: {title} {link} + title_pattern: Open Source + title_pattern_case_sensitive: true + no_uri_pattern_no_global_pattern=true + + [hashtaglist] + several_words_hashtags_list: /etc/feed2toot/hashtags.txt + +For the [mastodon] section: + +- user_credentials: a file with the user credentials, generated by the command register_feed2toot_app +- client_credentials: a file with the client credentials, generated by the command register_feed2toot_app + +For the [cache] section: + +- cachefile: the path to the cache file storing ids of already tweeted links. Absolute path is mandatory. This file should always use the .db extension. +- cache_limit: length of the cache queue. defaults to 100. + +For the [rss] section: + +- uri: the url of the rss feed to parse +- uri_list: a path to a file with several adresses of rss feeds, one by line. Absolute path is mandatory. +- tweet: format of the tweet you want to post. It should use existing entries of the RSS fields like {title} or {link}. Launch it with this field empty to display all available entries. +- {one field of the rss feed}_pattern: takes a string representing a pattern to match for a specified field of each rss entry of the rss feed, like title_pattern or summary_pattern. +- {one field of the rss feed}_pattern_case_sensitive: either the pattern matching for the specified field should be case sensitive or not. Default to true if not specified. +- no_uri_pattern_no_global_pattern: don't apply global pattern (see above) when no pattern-by-uri is defined in the uri_list. Allows to get all entries of a rss in the uri_list because no pattern is defined so we match them all. Defaults to false, meaning the global patterns will be tried on every rss in the uri_list NOT HAVING specific patterns and so ONLY entries from the specific uri in the uri_list matching the global patterns will be considered. + +For the [hashtaglist] section: + +- several_words_hashtags_list: a path to the file containing hashtags in two or more words. Absolute path is mandatory. By default Feed2toot adds a # before every words of a hashtag. + +List of rss feeds +================= +Simple list of rss feeds +------------------------ +With the parameter **uri_list**, you can define a list of uri to use. Starting from 0.10, Feed2toot is now able to match specific patterns for each of the rss feeds from this list. Consider the following rss section of the configuration file:: + + [rss] + uri_list=/home/john/feed2toot/rsslist.txt + tweet={title} {link} + +Now let's have a look at the =/home/john/feed2toot/rsslist.txt file:: + + https://www.journalduhacker.net/rss + https://carlchenet.com/feed + +Each line of this file is a url to a rss feed. Pretty simple. + +Match specific patterns of rss feeds in the uri_list files +---------------------------------------------------------- +You can use specific pattern matching for uri in the uri_list file to filter some of the rss entries of a rss feed. Lets modify the previous file:: + +https://www.journalduhacker.net/rss|title|hacker,psql +https://carlchenet.com/feed|title|gitlab + +Each line of this file starts with an uri, followed by a pipe (|), followed by the name of the available section to parse (see below), again followed by a pipe (|), followed by patterns, each pattern being separated from the other one by a semi-colon (,). + +In the example file above wee get every rss entries from the feed available at https://www.journalduhacker.net/rss where a substring in the title section of this entry matches either "hacker" or "psql". Specific patterns are not case sensitive. For the second line, we match every rss entries from the feed available at https://carlchenet.com/feed where a substring in the title section of this entry matches "gitlab". + +Consider every entries of a rss feed from a uri in the uri_list file +-------------------------------------------------------------------- +It is possible to get all entries from a rss feed available in the uri_list file. You need an option to deactivate the global pattern matching for uri in the uri_list NOT having specific patterns:: + + [rss] + ... + no_uri_pattern_no_global_pattern=true + +In you rsslist.txt, just don't give anything else than the needed feed url to get all the entries:: + +https://www.journalduhacker.net/rss|title|hacker,psql +https://carlchenet.com/feed|title|gitlab +https://blog.linuxjobs.fr/feed.php?rss + +The last line of the file above only has the url of a rss feed. All entries from this feed will be tweeted. + +How to display available sections of the rss feed +================================================= +Feed2toot offers the **--rss-sections** command line option to display the available section of the rss feed and exits:: + + $ feed2toot --rss-sections -c feed2toot.ini + The following sections are available in this RSS feed: ['title', 'comments', 'authors', 'link', 'author', 'summary', 'links', 'tags', id', 'author_detail', 'published']. diff --git a/docs/source/index.rst b/docs/source/index.rst new file mode 100644 index 0000000..f848453 --- /dev/null +++ b/docs/source/index.rst @@ -0,0 +1,27 @@ +Documentation for the Feed2toot project +======================================= + +Feed2toot parses a RSS feed, extracts the last entries and sends them to Mastodon. +You'll find below anything you need to install, configure or run Feed2toot. + +Guide +===== + +.. toctree:: + :maxdepth: 2 + + install + configure + use + plugins + license + authors + + +Indices and tables +================== + +* :ref:`genindex` +* :ref:`modindex` +* :ref:`search` + diff --git a/docs/source/install.rst b/docs/source/install.rst new file mode 100644 index 0000000..f75cc9c --- /dev/null +++ b/docs/source/install.rst @@ -0,0 +1,36 @@ +How to install Feed2toot +======================== +From PyPI +^^^^^^^^^ + $ pip3 install feed2toot + +From sources +^^^^^^^^^^^^ +* You need at least Python 3.4. + +* On some Linux Distribution **setuptools** package does not come with default python install, you need to install it. + +* Install **PIP**:: + + $ wget https://bootstrap.pypa.io/get-pip.py -O - | sudo python3 + + +* Install **setuptools** module:: + + $ wget https://bootstrap.pypa.io/ez_setup.py -O - | sudo python3 + +Alternatively, Setuptools may be installed to a user-local path:: + + $ wget https://bootstrap.pypa.io/ez_setup.py -O - | python3 - --user + +* Untar the tarball and go to the source directory with the following commands:: + + $ tar zxvf feed2toot-0.1.tar.gz + $ cd feed2toot + +* Next, to install Feed2toot on your computer, type the following command with the root user:: + + $ python3 setup.py install + $ # or + $ python3 setup.py install --install-scripts=/usr/bin + diff --git a/docs/source/license.rst b/docs/source/license.rst new file mode 100644 index 0000000..82a7996 --- /dev/null +++ b/docs/source/license.rst @@ -0,0 +1,4 @@ +License +======= + +This software comes under the terms of the **GPLv3+**. It was previously under the **MIT** license. See the LICENSE file for the complete history of the license and the full text of the past and current licenses. diff --git a/docs/source/plugins.rst b/docs/source/plugins.rst new file mode 100644 index 0000000..b885c89 --- /dev/null +++ b/docs/source/plugins.rst @@ -0,0 +1,38 @@ +Plugins +======= +Feed2toot supports plugins. Plugins offer optional features, not supported by default. Optional means you need a dedicated configuration and sometimes a dedicated external dependencies. What you need for each module is specified below. + +InfluxDB +-------- +The InfluxDB plugin allows to store already published tweets in a InfluxDB database. + +Install the InfluxDB plugin +^^^^^^^^^^^^^^^^^^^^^^^^^^^ +To install Feed2toot with the InfluxDB plugin, execute the following command. + +From scratch:: + + # pip3 install feed2toot[influxdb] + +Upgrading from a previous version, execute the followin command:: + + # pip3 install feed2toot[influxdb] --upgrade + +Configuration +^^^^^^^^^^^^^ +Below is the block of configuration to add in your feed2toot.ini:: + + [influxdb] + ;host=127.0.0.1 + ;port=8086 + user=influxuser + pass=V3ryS3cr3t + database=influxdb + measurement=tweets + +- host: the host where the influxdb instance is. Defaults to 127.0.0.1 +- port: the port where the influxdb instance is listening to. Defaults to 8086 +- user: the user authorized to connect to the database. Mandatory (no default) +- pass: the password needed to connect to the database. Mandatory (no default) +- database: the name of the influxdb database to connect to. Mandatory (no default) +- measurement: the measurement to store the value into. Mandatory (no default) diff --git a/docs/source/use.rst b/docs/source/use.rst new file mode 100644 index 0000000..0066532 --- /dev/null +++ b/docs/source/use.rst @@ -0,0 +1,56 @@ +Use Feed2toot +============== +After the configuration of Feed2toot, just launch the following command:: + + $ feed2toot -c /path/to/feed2toot.ini + +Run Feed2toot on a regular basis +================================= +Feed2toot should be launche on a regular basis in order to efficiently send your new RSS entries to Mastodon. It is quite easy to achieve with adding a line to your user crontab, as described below:: + + @hourly feed2toot -c /path/to/feed2toot.ini + +will execute feed2toot every hour. Or without the syntactic sugar in the global crontab file /etc/crontab:: + + 0 * * * * johndoe feed2toot -c /path/to/feed2toot.ini + +Test option +=========== +In order to know what's going to be sent to Mastodon without actually doing it, use the **--dry-run** option:: + + $ feed2toot --dry-run -c /path/to/feed2toot.ini + +Debug option +============ +In order to increase the verbosity of what's Feed2toot is doing, use the **--debug** option followed by the level of verbosity see [the the available different levels](https://docs.python.org/3/library/logging.html):: + + $ feed2toot --debug -c /path/to/feed2toot.ini + +Populate the cache file without posting tweets +============================================== +Starting from 0.8, Feed2toot offers the **--populate-cache** command line option to populate the cache file without posting to Mastodon:: + + $ feed2toot --populate-cache -c feed2toot.ini + populating RSS entry https://www.journalduhacker.net/s/65krkk + populating RSS entry https://www.journalduhacker.net/s/co2es0 + populating RSS entry https://www.journalduhacker.net/s/la2ihl + populating RSS entry https://www.journalduhacker.net/s/stfwtx + populating RSS entry https://www.journalduhacker.net/s/qq1wte + populating RSS entry https://www.journalduhacker.net/s/y8mzrp + populating RSS entry https://www.journalduhacker.net/s/ozjqv0 + populating RSS entry https://www.journalduhacker.net/s/6ev8jz + populating RSS entry https://www.journalduhacker.net/s/gezvnv + populating RSS entry https://www.journalduhacker.net/s/lqswmz + +How to display available sections of the rss feed +================================================= +Starting from 0.8, Feed2toot offers the **--rss-sections** command line option to display the available section of the rss feed and exits:: + + $ feed2toot --rss-sections -c feed2toot.ini + The following sections are available in this RSS feed: ['title', 'comments', 'authors', 'link', 'author', 'summary', 'links', 'tags', id', 'author_detail', 'published']. + +Using syslog +============ +Feed2toot is able to send its log to syslog. You can use it with the following command:: + + $ feed2toot --syslog=WARN -c /path/to/feed2toot.ini diff --git a/feed2toot.py b/feed2toot.py new file mode 100755 index 0000000..d7ce8ba --- /dev/null +++ b/feed2toot.py @@ -0,0 +1,36 @@ +#!/usr/bin/env python3 +# vim:ts=4:sw=4:ft=python:fileencoding=utf-8 +# Copyright © 2015-2017 Carl Chenet +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see + +# Launch Feed2toot +'''Launch Feed2toot''' + +import sys +from feed2toot.main import Main + +class Feed2Toot(object): + '''Feed2toot class''' + + def __init__(self): + '''Constructor of the Feed2Toot class''' + self.main() + + def main(self): + '''main of the Feed2Toot class''' + Main() + +if __name__ == '__main__': + Main() + sys.exit(0) diff --git a/feed2toot/__init__.py b/feed2toot/__init__.py new file mode 100644 index 0000000..d19bf9f --- /dev/null +++ b/feed2toot/__init__.py @@ -0,0 +1,15 @@ +#!/usr/bin/env python3 +# vim:ts=4:sw=4:ft=python:fileencoding=utf-8 +# Copyright © 2017 Carl Chenet +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see diff --git a/feed2toot/addtags.py b/feed2toot/addtags.py new file mode 100644 index 0000000..b84473d --- /dev/null +++ b/feed2toot/addtags.py @@ -0,0 +1,52 @@ +#!/usr/bin/env python3 +# vim:ts=4:sw=4:ft=python:fileencoding=utf-8 +# Copyright © 2015-2017 Carl Chenet +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see + +# Add as many tags as possible depending on the tweet length +'''Add as many tags as possible depending on the tweet length''' + +# standard library imports +from operator import itemgetter + +class AddTags(object): + '''Add as many tags as possible depending on the tweet length''' + def __init__(self, tweet, tags): + '''Constructor of AddTags class''' + self.tags = tags + self.tweet = tweet + self.main() + + def main(self): + '''Main of the AddTags class class''' + maxlength = 500 + shortenedlinklength = 23 + tweetlength = len(self.tweet) + + # sort list of tags, the ones with the greater length first + tagswithindices = ({'text':i, 'length':len(i)} for i in self.tags) + sortedtagswithindices = sorted(tagswithindices, key=itemgetter('length'), reverse=True) + self.tags = (i['text'] for i in sortedtagswithindices) + + # add tags is space is available + for tag in self.tags: + taglength = len(tag) + if (tweetlength + (taglength +1)) <= maxlength: + self.tweet = ' '.join([self.tweet, tag]) + tweetlength += (taglength + 1) + + @property + def finaltweet(self): + '''return the final tweet with as many tags as possible''' + return self.tweet diff --git a/feed2toot/cliparse.py b/feed2toot/cliparse.py new file mode 100644 index 0000000..39f6f1f --- /dev/null +++ b/feed2toot/cliparse.py @@ -0,0 +1,105 @@ +# -*- coding: utf-8 -*- +# Copyright © 2015-2017 Carl Chenet +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see + +# CLI parsing +'''CLI parsing''' + +# standard library imports +from argparse import ArgumentParser +import glob +import logging +import os.path +import sys + +__version__ = '0.1' + +class CliParse(object): + '''CliParse class''' + def __init__(self): + '''Constructor for the CliParse class''' + self.main() + + def main(self): + '''main of CliParse class''' + feed2tootepilog = 'For more information: https://feed2toot.readhthedocs.org' + feed2tootdescription = 'Take rss feed and send it to Mastodon' + parser = ArgumentParser(prog='feed2toot', + description=feed2tootdescription, + epilog=feed2tootepilog) + parser.add_argument('--version', action='version', version=__version__) + parser.add_argument('-c', '--config', + default=[os.path.join(os.getenv('XDG_CONFIG_HOME', '~/.config'), + 'feed2toot.ini')], + nargs='+', + dest="config", + help='Location of config file (default: %(default)s)', + metavar='FILE') + parser.add_argument('-a', '--all', action='store_true', default=False, + dest='all', + help='tweet all RSS items, regardless of cache') + parser.add_argument('-l', '--limit', dest='limit', default=10, type=int, + help='tweet only LIMIT items (default: %(default)s)') + parser.add_argument('--cachefile', dest='cachefile', + help='location of the cache file (default: %(default)s)') + parser.add_argument('-n', '--dry-run', dest='dryrun', + action='store_true', default=False, + help='Do not actually post tweets') + parser.add_argument('-v', '--verbose', '--info', dest='log_level', + action='store_const', const='info', default='warning', + help='enable informative (verbose) output, work on log level INFO') + parser.add_argument('-d', '--debug', dest='log_level', + action='store_const', const='debug', default='warning', + help='enable debug output, work on log level DEBUG') + levels = [i for i in logging._nameToLevel.keys() + if (type(i) == str and i != 'NOTSET')] + parser.add_argument('--syslog', nargs='?', default=None, + type=str.upper, action='store', + const='INFO', choices=levels, + help="""log to syslog facility, default: no + logging, INFO if --syslog is specified without + argument""") + parser.add_argument('--hashtaglist', dest='hashtaglist', + help='a list of hashtag to match') + parser.add_argument('-p', '--populate-cache', action='store_true', default=False, + dest='populate', + help='populate RSS entries in cache without actually posting them to Mastodon') + parser.add_argument('-r', '--rss', help='the RSS feed URL to fetch items from', + dest='rss_uri', metavar='http://...') + parser.add_argument('--rss-sections', action='store_true', default=False, + dest='rsssections', + help='print the available sections of the rss feed to be used in the tweet template') + self.opts = parser.parse_args() + # expand the path to the cache file if defined + if self.opts.cachefile: + self.opts.cachefile = os.path.expanduser(self.opts.cachefile) + # verify if the path to cache file is an absolute path + # get the different config files, from a directory or from a *.ini style + self.opts.config = list(map(os.path.expanduser, self.options.config)) + for element in self.opts.config: + if element and not os.path.exists(element): + sys.exit('You should provide an existing path for the config file: %s' % element) + if os.path.isdir(element): + self.opts.configs = glob.glob(os.path.join(element, '*.ini')) + else: + # trying to glob the path + self.opts.configs = glob.glob(element) + # verify if a configuration file is provided + if not self.opts.configs: + sys.exit('no configuration file was found at the specified path(s) with the option -c') + + @property + def options(self): + '''return the path to the config file''' + return self.opts diff --git a/feed2toot/confparse.py b/feed2toot/confparse.py new file mode 100644 index 0000000..18a0ea4 --- /dev/null +++ b/feed2toot/confparse.py @@ -0,0 +1,231 @@ +# -*- coding: utf-8 -*- +# Copyright © 2015-2017 Carl Chenet +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see 3 or len(confobjects) == 2: + sys.exit('This line in the list of uri to parse is not formatted correctly: {line}'.format(line)) + if len(confobjects) == 3: + rss, rssobject, patternstring = line.split('|') + if len(confobjects) == 1: + rss = confobjects[0] + rssobject = '' + patternstring = '' + # split different searched patterns + patterns = [i for i in patternstring.split(self.stringsep) if i] + # retrieve the content of the rss + feed = feedparser.parse(rss) + if 'bozo_exception' in feed: + bozoexception = True + logging.warning(feed['bozo_exception']) + continue + # check if the rss feed and the rss entry are valid ones + if 'entries' in feed: + if rssobject and rssobject not in feed['entries'][0].keys(): + sys.exit('The rss object {rssobject} could not be found in the feed {rss}'.format(rssobject=rssobject, rss=rss)) + else: + sys.exit('The rss feed {rss} does not seem to be valid'.format(rss=rss)) + feeds.append({'feed': feed, 'patterns': patterns, 'rssobject': rssobject}) + # test if all feeds in the list were unsuccessfully retrieved and if so, leave + if not feeds and bozoexception: + sys.exit('No feed could be retrieved. Leaving.') + ############################ + # uri + ############################ + if not feeds and not self.clioptions.rss_uri: + confoption = 'uri' + if config.has_option(section, confoption): + options['rss_uri'] = config.get('rss', 'uri') + else: + sys.exit('{confoption} parameter in the [{section}] section of the configuration file is mandatory. Exiting.'.format(section=section, confoption=confoption)) + else: + options['rss_uri'] = self.clioptions.rss_uri + # get the rss feed for rss parameter of [rss] section + feed = feedparser.parse(options['rss_uri']) + if not feed: + sys.exit('Unable to parse the feed at the following url: {rss}'.format(rss=rss)) + + ######################################### + # no_uri_pattern_no_global_pattern option + ######################################### + currentoption = 'no_uri_pattern_no_global_pattern' + # default value + options['nopatternurinoglobalpattern'] = False + if config.has_option(section, currentoption): + options['nopatternurinoglobalpattern'] = config.getboolean(section, currentoption) + ########################### + # + # the cache section + # + ########################### + section = 'cache' + if not self.clioptions.cachefile: + confoption = 'cachefile' + if config.has_section(section): + options['cachefile'] = config.get(section, confoption) + else: + sys.exit('You should provide a {confoption} parameter in the [{section}] section'.format(section=section, confoption=confoption)) + options['cachefile'] = os.path.expanduser(options['cachefile']) + cachefileparent = os.path.dirname(options['cachefile']) + if cachefileparent and not os.path.exists(cachefileparent): + sys.exit('The parent directory of the cache file does not exist: {cachefileparent}'.format(cachefileparent=cachefileparent)) + else: + options['cachefile'] = self.clioptions.cachefile + ### cache limit + if config.has_section(section): + confoption = 'cache_limit' + if config.has_option(section, confoption): + try: + options['cache_limit'] = int(config.get(section, confoption)) + except ValueError as err: + sys.exit('Error in configuration with the {confoption} parameter in [{section}]: {err}'.format(confoption=confoption, section=section, err=err)) + else: + options['cache_limit'] = 100 + else: + options['cache_limit'] = 100 + ########################### + # + # the hashtag section + # + ########################### + section = 'hashtaglist' + if not self.clioptions.hashtaglist: + confoption = 'several_words_hashtags_list' + if config.has_section(section): + options['hashtaglist'] = config.get(section, confoption) + options['hashtaglist'] = os.path.expanduser(options['hashtaglist']) + if not os.path.exists(options['hashtaglist']) or not os.path.isfile(options['hashtaglist']): + sys.exit('The path to the several_words_hashtags_list parameter is not valid: {hashtaglist}'.format(hashtaglist=options['hashtaglist'])) + else: + options['hashtaglist'] = False + ########################### + # + # the plugins section + # + ########################### + plugins = {} + section = 'influxdb' + if config.has_section(section): + ########################################## + # host, port, user, pass, database options + ########################################## + plugins[section] = {} + for currentoption in ['host','port','user','pass','database']: + if config.has_option(section, currentoption): + plugins[section][currentoption] = config.get(section, currentoption) + if 'host' not in plugins[section]: + plugins[section]['host'] = '127.0.0.1' + if 'port' not in plugins[section]: + plugins[section]['port'] = 8086 + if 'measurement' not in plugins[section]: + plugins[section]['measurement'] = 'tweets' + for field in ['user','pass','database']: + if field not in plugins[section]: + sys.exit('Parsing error for {field} in the [{section}] section: {field} is not defined'.format(field=field, section=section)) + + # create the returned object with previously parsed data + if feeds: + self.confs.append((options, config, self.tweetformat, feeds, plugins)) + else: + self.confs.append((options, config, self.tweetformat, [{'feed': feed, 'patterns': [], 'rssobject': ''}], plugins)) + + @property + def confvalues(self): + '''Return the values of the different configuration files''' + return self.confs diff --git a/feed2toot/filterentry.py b/feed2toot/filterentry.py new file mode 100644 index 0000000..bf27ffe --- /dev/null +++ b/feed2toot/filterentry.py @@ -0,0 +1,90 @@ +# -*- coding: utf-8 -*- +# Copyright © 2015-2017 Carl Chenet +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see + +"""Checks an RSS feed and posts new entries to Mastodon.""" + +# standard libraires imports +import codecs +import importlib +import logging +import logging.handlers +import os +import sys + +# 3rd party libraries imports +import feedparser +from persistentlist import PersistentList +import tweepy + +# app libraries imports +from feed2toot.addtags import AddTags +from feed2toot.cliparse import CliParse +from feed2toot.confparse import ConfParse +from feed2toot.filterentry import FilterEntry +from feed2toot.removeduplicates import RemoveDuplicates +from feed2toot.tootpost import TootPost + +class Main(object): + '''Main class of Feed2toot''' + + def __init__(self): + self.main() + + def setup_logging(self, options): + if options.syslog: + sl = logging.handlers.SysLogHandler(address='/dev/log') + sl.setFormatter(logging.Formatter('feed2toot[%(process)d]: %(message)s')) + # convert syslog argument to a numeric value + loglevel = getattr(logging, options.syslog.upper(), None) + if not isinstance(loglevel, int): + raise ValueError('Invalid log level: %s' % loglevel) + sl.setLevel(loglevel) + logging.getLogger('').addHandler(sl) + logging.debug('configured syslog level %s' % loglevel) + logging.getLogger('').setLevel(logging.DEBUG) + sh = logging.StreamHandler() + sh.setLevel(options.log_level.upper()) + logging.getLogger('').addHandler(sh) + logging.debug('configured stdout level %s' % sh.level) + + def main(self): + """The main function.""" + clip = CliParse() + clioptions = clip.options + self.setup_logging(clioptions) + # iterating over the different configuration files + cfgp = ConfParse(clioptions) + confs = cfgp.confvalues + for conf in confs: + options = conf[0] + config = conf[1] + tweetformat = conf[2] + feeds = conf[3] + plugins = conf[4] + # open the persistent list + cache = PersistentList(options['cachefile'][0:-3], options['cache_limit']) + if options['hashtaglist']: + severalwordshashtags = codecs.open(options['hashtaglist'], + encoding='utf-8').readlines() + severalwordshashtags = [i.rstrip('\n') for i in severalwordshashtags] + # reverse feed entries because most recent one should be sent as the last one in Mastodon + for feed in feeds: + # store the patterns by rss + if 'patterns' in feed: + patterns = feed['patterns'] + entries = feed['feed']['entries'][0:clioptions.limit] + entries.reverse() + # --rss-sections option: print rss sections and exit + if clioptions.rsssections: + if entries: + print('The following sections are available in this RSS feed: {}'.format([j for j in entries[0]])) + sys.exit(0) + else: + sys.exit('Could not parse the section of the rss feed') + totweet = [] + # cache the ids of last rss feeds + if not clioptions.all: + for i in entries: + if 'id' in i and i['id'] not in cache: + totweet.append(i) + else: + totweet = entries + + for entry in totweet: + if 'id' not in entry: + # malformed feed entry, skip + continue + logging.debug('found feed entry %s, %s', entry['id'], entry['title']) + + + rss = { + 'id': entry['id'], + } + + severalwordsinhashtag = False + # lets see if the rss feed has hashtag + if 'tags' in entry: + hastags = True + else: + hastags = False + + if hastags: + rss['hashtags'] = [] + for i, _ in enumerate(entry['tags']): + if 'hashtaglist' in options: + prehashtags = entry['tags'][i]['term'] + tmphashtags = entry['tags'][i]['term'] + for element in severalwordshashtags: + if element in prehashtags: + severalwordsinhashtag = True + tmphashtags = prehashtags.replace(element, + ''.join(element.split())) + # replace characters stopping a word from being a hashtag + if severalwordsinhashtag: + # remove ' from hashtag + tmphashtags = tmphashtags.replace("'", "") + # remove - from hashtag + tmphashtags = tmphashtags.replace("-", "") + # remove . from hashtag + tmphashtags = tmphashtags.replace(".", "") + # remove space from hashtag + finalhashtags = tmphashtags.replace(" ", "") + rss['hashtags'].append('#{}'.format(finalhashtags)) + else: + nospace = ''.join(entry['tags'][i]['term']) + # remove space from hashtag + nospace = nospace.replace(" ", "") + rss['hashtags'].append('#{}'.format(nospace)) + + elements=[] + for i in tweetformat.split(' '): + tmpelement = '' + # if i is not an empty string + if i: + if i.startswith('{') and i.endswith('}'): + tmpelement = i.strip('{}') + elements.append(tmpelement) + # match elements of the tweet format string with available element in the RSS feed + fe = FilterEntry(elements, entry, options, feed['patterns'], feed['rssobject']) + entrytosend = fe.finalentry + if entrytosend: + tweetwithnotag = tweetformat.format(**entrytosend) + # remove duplicates from the final tweet + dedup = RemoveDuplicates(tweetwithnotag) + # only append hashtags if they exist + # remove last tags if tweet too long + if 'hashtags' in rss: + addtag = AddTags(dedup.finaltweet, rss['hashtags']) + finaltweet = addtag.finaltweet + else: + finaltweet = dedup.finaltweet + + if clioptions.dryrun: + if entrytosend: + logging.warning('Tweet should have been sent: {tweet}'.format(tweet=finaltweet)) + else: + logging.debug('This rss entry did not meet pattern criteria. Should have not been sent') + else: + storeit = True + if entrytosend and not clioptions.populate: + logging.debug('sending the following tweet:{tweet}'.format(tweet=finaltweet)) + twp = TootPost(config, finaltweet) + storeit = twp.storeit() + else: + logging.debug('populating RSS entry {}'.format(rss['id'])) + # in both cas we store the id of the sent tweet + if storeit: + cache.append(rss['id']) + # plugins + if plugins and entrytosend: + for plugin in plugins: + capitalizedplugin = plugin.title() + pluginclassname = '{plugin}Plugin'.format(plugin=capitalizedplugin) + pluginmodulename = 'feed2toot.plugins.{pluginmodule}'.format(pluginmodule=pluginclassname.lower()) + try: + pluginmodule = importlib.import_module(pluginmodulename) + pluginclass = getattr(pluginmodule, pluginclassname) + pluginclass(plugins[plugin], finaltweet) + except ImportError as err: + print(err) + # do not forget to close cache (shelf object) + cache.close() diff --git a/feed2toot/plugins/__init__.py b/feed2toot/plugins/__init__.py new file mode 100644 index 0000000..d19bf9f --- /dev/null +++ b/feed2toot/plugins/__init__.py @@ -0,0 +1,15 @@ +#!/usr/bin/env python3 +# vim:ts=4:sw=4:ft=python:fileencoding=utf-8 +# Copyright © 2017 Carl Chenet +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see diff --git a/feed2toot/plugins/influxdbplugin.py b/feed2toot/plugins/influxdbplugin.py new file mode 100644 index 0000000..ccb8b5a --- /dev/null +++ b/feed2toot/plugins/influxdbplugin.py @@ -0,0 +1,42 @@ +# -*- coding: utf-8 -*- +# Copyright © 2017 Carl Chenet +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see + +# Remove duplicates from the final string before sending the tweet +'''Remove duplicates from the final string before sending the tweet''' + +class RemoveDuplicates(object): + '''Remove duplicates from the final string before sending the tweet''' + def __init__(self, tweet): + '''Constructor of RemoveDuplicates class''' + self.tweet = tweet + self.main() + + def main(self): + '''Main of the RemoveDuplicates class''' + # identify duplicate links + links = [] + for element in self.tweet.split(): + if element != ' ' and (element.startswith('http://') or element.startswith('https://')): + newlink = True + # if we already found this link, increment the counter + for i,_ in enumerate(links): + if links[i]['link'] == element: + newlink = False + links[i]['count'] += 1 + if newlink: + links.append({'link': element, 'count': 1}) + # remove duplicates + validatedlinks = [] + for i in range(len(links)): + if links[i]['count'] >= 2: + validatedlinks.append(links[i]) + wildcard = 'FEED2TOOTWILDCARD' + for element in validatedlinks: + for i in range(element['count']): + # needed for not inversing the order of links if it is a duplicate + # and the second link is not one + if i == 0: + self.tweet = self.tweet.replace(element['link'], wildcard, 1 ) + else: + self.tweet = self.tweet.replace(element['link'], '', 1) + # finally + self.tweet = self.tweet.replace(wildcard, element['link'], 1) + # remove all 2xspaces + self.tweet = self.tweet.replace(' ', ' ') + + @property + def finaltweet(self): + '''return the final tweet after duplicates were removed''' + return self.tweet diff --git a/feed2toot/tootpost.py b/feed2toot/tootpost.py new file mode 100644 index 0000000..4288b72 --- /dev/null +++ b/feed2toot/tootpost.py @@ -0,0 +1,51 @@ +# vim:ts=4:sw=4:ft=python:fileencoding=utf-8 +# Copyright © 2015-2017 Carl Chenet +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see + +"""Checks an RSS feed and posts new entries to Mastodon.""" + +# standard libraires imports +from configparser import SafeConfigParser, NoOptionError, NoSectionError +from argparse import ArgumentParser +import codecs +import logging +import os +import sys + +# 3rd party libraries imports +import feedparser +import tweepy +from mastodon import Mastodon + +class TootPost: + '''TootPost class''' + + def __init__(self, config, toot): + '''Constructore of the TootPost class''' + self.config = config + self.store = True + self.toot = toot + self.main() + + def main(self): + '''Main of the TweetPost class''' + mastodon = Mastodon( + client_id = self.config.get('mastodon', 'client_credentials'), + access_token = self.config.get('mastodon', 'user_credentials') + ) + mastodon.toot(self.toot) + + def storeit(self): + '''Indicate if the tweet should be stored or not''' + return self.store diff --git a/scripts/feed2toot b/scripts/feed2toot new file mode 100755 index 0000000..cb5f300 --- /dev/null +++ b/scripts/feed2toot @@ -0,0 +1,25 @@ +#!/usr/bin/env python3 +# vim:ts=4:sw=4:ft=python:fileencoding=utf-8 +# Copyright © 2015-2017 Carl Chenet +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see + +# Feed2toot startup +'''Feed2toot startup''' + +import sys +from feed2toot.main import Main + +if __name__ == '__main__': + Main() + sys.exit(0) diff --git a/scripts/register_feed2toot_app b/scripts/register_feed2toot_app new file mode 100755 index 0000000..0721db5 --- /dev/null +++ b/scripts/register_feed2toot_app @@ -0,0 +1,64 @@ +#!/usr/bin/env python3 +#!/usr/bin/env python3 +# vim:ts=4:sw=4:ft=python:fileencoding=utf-8 +# Copyright © 2015-2017 Carl Chenet +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see + +from getpass import getpass +from os import getcwd +from mastodon import Mastodon +from mastodon.Mastodon import MastodonIllegalArgumentError +import sys + +print('\nThis app generates Mastodon app credentials needed by Feed2toot.\nfeed2toot_clientcred.txt and feed2toot_usercred.txt will be written in the current dir {cwd}.\nOne connection is initiated to create the app.\nYour password is *not* stored.\n'.format(cwd=getcwd())) + +# get the instance +instance = input('Mastodon instance url (defaults to https://mastodon.social):') +if not instance: + instance = 'https://mastodon.social' +elif not instance.startswith('http'): + instance = ''.join(['https://', instance]) + +# get the username +userok = False +while not userok: + user = input('Mastodon login:') + if not user: + print('Your Mastodon username can not be empty') + userok = False + elif '@' not in user or '.' not in user: + print('Your Mastodon username should be an email') + userok = False + else: + userok = True + +# get the password +password = getpass(prompt='Mastodon password:') +Mastodon.create_app( + 'feed2toot', + to_file = '{cwd}/feed2toot_clientcred.txt'.format(cwd=getcwd()) +) +mastodon = Mastodon(client_id = '{cwd}/feed2toot_clientcred.txt'.format(cwd=getcwd()), + api_base_url=instance) +try: + mastodon.log_in( + user, + password, + to_file = '{cwd}/feed2toot_usercred.txt'.format(cwd=getcwd()) + ) +except MastodonIllegalArgumentError as err: + print(err) + sys.exit('\nMy guess is bad login/password\n') +print('The feed2toot app was added to your preferences=>authorized apps page') +sys.exit(0) diff --git a/setup.py b/setup.py new file mode 100755 index 0000000..34765f1 --- /dev/null +++ b/setup.py @@ -0,0 +1,49 @@ +# Copyright 2015-2017 Carl Chenet +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see +#!/usr/bin/env python3 + +# Setup for Feed2toot +'''Setup for Feed2toot''' + +from setuptools import setup, find_packages + +CLASSIFIERS = [ + 'Intended Audience :: End Users/Desktop', + 'Environment :: Console', + 'License :: OSI Approved :: GNU General Public License (GPL)', + 'Operating System :: POSIX :: Linux', + 'Programming Language :: Python :: 3.4', + 'Programming Language :: Python :: 3.5', + 'Programming Language :: Python :: 3.6' +] + + +setup( + name='feed2toot', + version='0.1', + license='GNU GPL v3', + description='Parse rss feed and tweet new posts to Mastodon', + long_description='Parse rss feed and tweet new posts to the Mastodon social network', + author = 'Carl Chenet', + author_email = 'chaica@ohmytux.com', + url = 'https://github.com/chaica/feed2toot', + classifiers=CLASSIFIERS, + download_url='https://github.com/chaica/feed2toot', + packages=find_packages(), + scripts=['scripts/feed2toot', 'scripts/register_feed2toot_app'], + install_requires=['feedparser', 'persistentlist>=0.4', 'Mastodon.py'], + extras_require={ + 'influxdb': ["influxdb"] + } +)