Source code for superfeedr

"""Superfeedr.

* https://superfeedr.com/users/snarfed
* http://documentation.superfeedr.com/subscribers.html
* http://documentation.superfeedr.com/schema.html
"""
import logging

from flask import request
from flask.views import View
from google.cloud.ndb.key import _MAX_KEYPART_BYTES
from google.cloud.ndb._datastore_types import _MAX_STRING_LENGTH
from webutil import appengine_info
from requests.auth import HTTPBasicAuth

import models
import util

logger = logging.getLogger(__name__)

SUPERFEEDR_TOKEN = util.read('superfeedr_token')
SUPERFEEDR_USERNAME = util.read('superfeedr_username')
PUSH_API_URL = 'https://push.superfeedr.com'
MAX_BLOGPOST_LINKS = 10
TRANSIENT_ERROR_HTTP_CODES = ('500', '501', '502', '503', '429')

[docs] def subscribe(source): """Subscribes to a source. Also receives some past posts and adds propagate tasks for them. http://documentation.superfeedr.com/subscribers.html#addingfeedswithpubsubhubbub Args: source (Tumblr, or WordPress) """ if appengine_info.LOCAL_SERVER: logger.info('Running locally, not subscribing to Superfeedr') return data = { 'hub.mode': 'subscribe', 'hub.topic': source.feed_url(), 'hub.callback': util.host_url(f'/{source.SHORT_NAME}/notify/{source.key_id()}'), # TODO # 'hub.secret': 'xxx', 'format': 'json', 'retrieve': 'true', } logger.info(f'Adding Superfeedr subscription: {data}') resp = util.requests_post( PUSH_API_URL, data=data, auth=HTTPBasicAuth(SUPERFEEDR_USERNAME, SUPERFEEDR_TOKEN)) resp.raise_for_status() handle_feed(resp.json(), source)
[docs] def handle_feed(feed, source): """Handles a Superfeedr JSON feed. Creates :class:`models.BlogPost` entities and adds propagate-blogpost tasks for new items. * http://documentation.superfeedr.com/schema.html#json * http://documentation.superfeedr.com/subscribers.html#pubsubhubbubnotifications Args: feed (str): Superfeedr JSON feed source (Tumblr, or WordPress) """ logger.info(f'Source: {source.label()} {source.key_id()}') logger.info(f'Raw feed: {feed}') if not feed: return if source.status != 'enabled': logger.info(f'Dropping because source is {source.status}') return elif 'webmention' not in source.features: logger.info("Dropping because source doesn't have webmention feature") return for item in feed.get('items', []): url = item.get('permalinkUrl') or item.get('id') if not url: logger.error('Dropping feed item without permalinkUrl or id!') continue # extract links from content, discarding self links. # # i don't use get_webmention_target[s]() here because they follows redirects # and fetch link contents, and this handler should be small and fast and try # to return a response to superfeedr successfully. content = item.get('content') or item.get('summary', '') links = [util.clean_url(util.unwrap_t_umblr_com(url)) for url in util.extract_links(content) if util.domain_from_link(url) not in source.domains] unique = [] for link in util.dedupe_urls(links): if len(link) <= _MAX_STRING_LENGTH: unique.append(link) else: logger.info(f'Giving up on link over {_MAX_STRING_LENGTH} chars! {link}') if len(unique) >= MAX_BLOGPOST_LINKS: logger.info('Stopping at 10 links! Skipping the rest.') break logger.info(f'Found links: {unique}') if len(url) > _MAX_KEYPART_BYTES: logger.warning('Blog post URL is too long (over 500 chars)! Giving up.') bp = models.BlogPost(id=url[:_MAX_KEYPART_BYTES], source=source.key, feed_item=item, failed=unique) else: bp = models.BlogPost(id=url, source=source.key, feed_item=item, unsent=unique) bp.get_or_save()
[docs] class Notify(View): """Handles a Superfeedr notification. Abstract; subclasses must set the :attr:`SOURCE_CLS` attr. http://documentation.superfeedr.com/subscribers.html#pubsubhubbubnotifications """ SOURCE_CLS = None def dispatch_request(self, id): source = self.SOURCE_CLS.get_by_id(id) if source: handle_feed(request.json, source) return ''