"""Superfeedr.
* https://superfeedr.com/users/snarfed
* http://documentation.superfeedr.com/subscribers.html
* http://documentation.superfeedr.com/schema.html
"""
import logging
from flask import request
from flask.views import View
from google.cloud.ndb.key import _MAX_KEYPART_BYTES
from google.cloud.ndb._datastore_types import _MAX_STRING_LENGTH
from webutil import appengine_info
from requests.auth import HTTPBasicAuth
import models
import util
logger = logging.getLogger(__name__)
SUPERFEEDR_TOKEN = util.read('superfeedr_token')
SUPERFEEDR_USERNAME = util.read('superfeedr_username')
PUSH_API_URL = 'https://push.superfeedr.com'
MAX_BLOGPOST_LINKS = 10
TRANSIENT_ERROR_HTTP_CODES = ('500', '501', '502', '503', '429')
[docs]
def subscribe(source):
"""Subscribes to a source.
Also receives some past posts and adds propagate tasks for them.
http://documentation.superfeedr.com/subscribers.html#addingfeedswithpubsubhubbub
Args:
source (Tumblr, or WordPress)
"""
if appengine_info.LOCAL_SERVER:
logger.info('Running locally, not subscribing to Superfeedr')
return
data = {
'hub.mode': 'subscribe',
'hub.topic': source.feed_url(),
'hub.callback': util.host_url(f'/{source.SHORT_NAME}/notify/{source.key_id()}'),
# TODO
# 'hub.secret': 'xxx',
'format': 'json',
'retrieve': 'true',
}
logger.info(f'Adding Superfeedr subscription: {data}')
resp = util.requests_post(
PUSH_API_URL, data=data,
auth=HTTPBasicAuth(SUPERFEEDR_USERNAME, SUPERFEEDR_TOKEN))
resp.raise_for_status()
handle_feed(resp.json(), source)
[docs]
def handle_feed(feed, source):
"""Handles a Superfeedr JSON feed.
Creates :class:`models.BlogPost` entities and adds propagate-blogpost tasks
for new items.
* http://documentation.superfeedr.com/schema.html#json
* http://documentation.superfeedr.com/subscribers.html#pubsubhubbubnotifications
Args:
feed (str): Superfeedr JSON feed
source (Tumblr, or WordPress)
"""
logger.info(f'Source: {source.label()} {source.key_id()}')
logger.info(f'Raw feed: {feed}')
if not feed:
return
if source.status != 'enabled':
logger.info(f'Dropping because source is {source.status}')
return
elif 'webmention' not in source.features:
logger.info("Dropping because source doesn't have webmention feature")
return
for item in feed.get('items', []):
url = item.get('permalinkUrl') or item.get('id')
if not url:
logger.error('Dropping feed item without permalinkUrl or id!')
continue
# extract links from content, discarding self links.
#
# i don't use get_webmention_target[s]() here because they follows redirects
# and fetch link contents, and this handler should be small and fast and try
# to return a response to superfeedr successfully.
content = item.get('content') or item.get('summary', '')
links = [util.clean_url(util.unwrap_t_umblr_com(url))
for url in util.extract_links(content)
if util.domain_from_link(url) not in source.domains]
unique = []
for link in util.dedupe_urls(links):
if len(link) <= _MAX_STRING_LENGTH:
unique.append(link)
else:
logger.info(f'Giving up on link over {_MAX_STRING_LENGTH} chars! {link}')
if len(unique) >= MAX_BLOGPOST_LINKS:
logger.info('Stopping at 10 links! Skipping the rest.')
break
logger.info(f'Found links: {unique}')
if len(url) > _MAX_KEYPART_BYTES:
logger.warning('Blog post URL is too long (over 500 chars)! Giving up.')
bp = models.BlogPost(id=url[:_MAX_KEYPART_BYTES], source=source.key,
feed_item=item, failed=unique)
else:
bp = models.BlogPost(id=url, source=source.key, feed_item=item, unsent=unique)
bp.get_or_save()
[docs]
class Notify(View):
"""Handles a Superfeedr notification.
Abstract; subclasses must set the :attr:`SOURCE_CLS` attr.
http://documentation.superfeedr.com/subscribers.html#pubsubhubbubnotifications
"""
SOURCE_CLS = None
def dispatch_request(self, id):
source = self.SOURCE_CLS.get_by_id(id)
if source:
handle_feed(request.json, source)
return ''