Add OPML support;

Set a new default node (Thank you roughnecks);
Improve CSS, JS, XSLT;
Neglect external libraries to produce syndications.
This commit is contained in:
Schimon Jehudah, Adv. 2024-07-12 15:39:17 +03:00
parent e07ff6e838
commit d1f1edbaca
8 changed files with 519 additions and 185 deletions

View file

@ -2,10 +2,10 @@
# -*- coding: utf-8 -*-
import datetime
from dateutil import parser
from fastapi import FastAPI, Request, Response
from fastapi.responses import FileResponse
from fastapi.staticfiles import StaticFiles
import feedgenerator
import json
from slixmpp import ClientXMPP
from slixmpp.exceptions import IqError, IqTimeout
@ -57,12 +57,11 @@ async def view_pubsub(request: Request):
if pubsub and node and item_id:
iq = await get_node_item(pubsub, node, item_id)
if iq:
link = 'xmpp:{pubsub}?;node={node};item={item}'.format(
pubsub=pubsub, node=node, item=item_id)
xml_atom = generate_rfc_4287(iq, link)
link = form_an_item_link(pubsub, node, item_id)
xml_atom = generate_atom(iq, link)
iq = await get_node_items(pubsub, node)
if iq:
generate_json(iq, node)
generate_json(iq)
else:
operator = get_configuration('settings')['operator']
json_data = [{'title' : 'Error retrieving items list.',
@ -79,7 +78,8 @@ async def view_pubsub(request: Request):
else:
text = 'Please check that PubSub node and item are valid and accessible.'
xml_atom = error_message(text)
result = append_stylesheet(xml_atom)
result = append_stylesheet(
xml_atom, 'atom.xsl', namespace='http://www.w3.org/2005/Atom')
# try:
# iq = await get_node_items(pubsub, node)
@ -94,32 +94,37 @@ async def view_pubsub(request: Request):
elif pubsub and node:
iq = await get_node_items(pubsub, node)
if iq:
link = form_a_link(pubsub, node)
xml_atom = generate_rfc_4287(iq, link)
link = form_a_node_link(pubsub, node)
xml_atom = generate_atom(iq, link)
else:
text = 'Please check that PubSub node is valid and accessible.'
xml_atom = error_message(text)
result = append_stylesheet(xml_atom)
result = append_stylesheet(
xml_atom, 'atom.xsl', namespace='http://www.w3.org/2005/Atom')
elif pubsub:
iq = await get_nodes(pubsub)
if iq:
link = 'xmpp:{pubsub}'.format(pubsub=pubsub)
result = pubsub_to_opml(iq)
xml_opml = generate_opml(iq)
result = append_stylesheet(xml_opml, 'opml.xsl')
else:
text = 'Please check that PubSub Jabber ID is valid and accessible.'
xml_atom = error_message(text)
result = append_stylesheet(xml_atom)
result = append_stylesheet(
xml_atom, 'atom.xsl', namespace='http://www.w3.org/2005/Atom')
elif node:
text = 'PubSub parameter is missing.'
xml_atom = error_message(text)
result = append_stylesheet(xml_atom)
result = append_stylesheet(
xml_atom, 'atom.xsl', namespace='http://www.w3.org/2005/Atom')
# else:
# result = ('Mandatory parameter PubSub and '
# 'optional parameter Node are missing.')
else:
text = 'The given domain {} is not allowed.'.format(pubsub)
xml_atom = error_message(text)
result = append_stylesheet(xml_atom)
result = append_stylesheet(
xml_atom, 'atom.xsl', namespace='http://www.w3.org/2005/Atom')
default = get_configuration('default')
if not result:
if default['pubsub'] and default['nodeid']:
@ -127,20 +132,23 @@ async def view_pubsub(request: Request):
pubsub = default['pubsub']
node = default['nodeid']
iq = await get_node_items(pubsub, node)
link = form_a_link(pubsub, node)
link = form_a_node_link(pubsub, node)
xml_atom = generate_rfc_4287(iq, link)
result = append_stylesheet(xml_atom)
result = append_stylesheet(
xml_atom, 'atom.xsl', namespace='http://www.w3.org/2005/Atom')
elif not settings['service']:
pubsub = default['pubsub']
node = default['nodeid']
iq = await get_node_items(pubsub, node)
link = form_a_link(pubsub, node)
link = form_a_node_link(pubsub, node)
xml_atom = generate_rfc_4287(iq, link)
result = append_stylesheet(xml_atom)
result = append_stylesheet(
xml_atom, 'atom.xsl', namespace='http://www.w3.org/2005/Atom')
else:
text = 'Please contact the administrator and ask him to set default PubSub and Node ID.'
xml_atom = error_message(text)
result = append_stylesheet(xml_atom)
result = append_stylesheet(
xml_atom, 'atom.xsl', namespace='http://www.w3.org/2005/Atom')
response = Response(content=result, media_type="application/xml")
return response
@ -166,104 +174,125 @@ async def get_node_items(pubsub, node):
async def get_nodes(pubsub):
try:
await xmpp.plugin['xep_0060'].get_nodes(pubsub, timeout=5)
iq = await xmpp.plugin['xep_0060'].get_nodes(pubsub, timeout=5)
return iq
except (IqError, IqTimeout) as e:
print(e)
def form_a_link(pubsub, node):
def form_a_node_link(pubsub, node):
link = 'xmpp:{pubsub}?;node={node}'.format(pubsub=pubsub, node=node)
return link
def form_an_item_link(pubsub, node, item_id):
link = 'xmpp:{pubsub}?;node={node};item={item}'.format(
pubsub=pubsub, node=node, item=item_id)
return link
def error_message(text):
"""Error message in RFC 4287: The Atom Syndication Format."""
feed = feedgenerator.Atom1Feed(
description = ('This is a syndication feed generated with XMPP Journal '
'Publisher (XJP), which conveys XEP-0060: Publish-'
'Subscribe nodes to standard RFC 4287: The Atom '
'Syndication Format.'),
language = 'en',
link = '',
subtitle = 'XMPP Journal Publisher',
title = 'StreamBurner')
namespace = '{http://www.w3.org/2005/Atom}'
feed_url = 'gemini://schimon.i2p/'
# create entry
feed.add_item(
description = text,
# enclosure = feedgenerator.Enclosure(enclosure, enclosure_size, enclosure_type) if args.entry_enclosure else None,
link = '',
# pubdate = updated,
title = 'Error',
# unique_id = ''
)
xml_atom = feed.writeString('utf-8')
xml_atom_extended = append_element(
xml_atom,
'generator',
'XMPP Journal Publisher (XJP)')
return xml_atom_extended
title = 'StreamBurner'
subtitle = 'XMPP Journal Publisher'
description = ('This is a syndication feed generated with XMPP Journal '
'Publisher, which conveys XEP-0060: Publish-Subscribe '
'nodes to standard RFC 4287: The Atom Syndication Format.')
language = 'en'
feed = ET.Element("feed")
feed.set('xmlns', 'http://www.w3.org/2005/Atom')
ET.SubElement(feed, 'title', {'type': 'text'}).text = title
ET.SubElement(feed, 'subtitle', {'type': 'text'}).text = subtitle
ET.SubElement(feed, 'author', {'name':'XMPP Journal Publisher','email':'xjp@schimon.i2p'})
ET.SubElement(feed, 'generator', {
'uri': 'https://git.xmpp-it.net/sch/XMPPJournalPublisher',
'version': '0.1'}).text = 'XMPP Journal Publisher (XJP)'
ET.SubElement(feed, 'updated').text = datetime.datetime.now(datetime.UTC).isoformat()
entry = ET.SubElement(feed, 'entry')
ET.SubElement(entry, 'title').text = 'Error'
ET.SubElement(entry, 'id').text = 'xjp-error'
ET.SubElement(entry, 'updated').text = datetime.datetime.now(datetime.UTC).isoformat()
ET.SubElement(entry, 'published').text = datetime.datetime.now(datetime.UTC).isoformat()
# ET.SubElement(entry, 'summary', {'type': summary_type_text}).text = summary_text
ET.SubElement(entry, 'content', {'type': 'text'}).text = text
return ET.tostring(feed, encoding='unicode')
def generate_rfc_4287(iq, link):
"""Convert XEP-0060: Publish-Subscribe to RFC 4287: The Atom Syndication Format."""
feed = feedgenerator.Atom1Feed(
description = ('This is a syndication feed generated with PubSub To '
'Atom, which conveys XEP-0060: Publish-Subscribe nodes '
'to standard RFC 4287: The Atom Syndication Format.'),
language = iq['pubsub']['items']['lang'],
link = link,
subtitle = 'XMPP PubSub Syndication Feed',
title = iq['pubsub']['items']['node'])
# See also iq['pubsub']['items']['substanzas']
entries = iq['pubsub']['items']
for entry in entries:
item = entry['payload']
# generate_rfc_4287
def generate_atom(iq, link):
"""Generate an Atom Syndication Format (RFC 4287) from a Publish-Subscribe (XEP-0060) node items."""
pubsub = iq['from'].bare
node = iq['pubsub']['items']['node']
title = node
link = link
# link = form_a_node_link(pubsub, node)
subtitle = 'XMPP PubSub Syndication Feed'
description = ('This is a syndication feed generated with XMPP Journal '
'Publisher, which conveys XEP-0060: Publish-Subscribe '
'nodes to standard RFC 4287: The Atom Syndication Format.')
language = iq['pubsub']['items']['lang']
items = iq['pubsub']['items']
feed = ET.Element("feed")
feed.set('xmlns', 'http://www.w3.org/2005/Atom')
ET.SubElement(feed, 'title', {'type': 'text'}).text = title
ET.SubElement(feed, 'subtitle', {'type': 'text'}).text = subtitle
ET.SubElement(feed, 'link', {'rel': 'self', 'href': link})
ET.SubElement(feed, 'generator', {
'uri': 'https://git.xmpp-it.net/sch/XMPPJournalPublisher',
'version': '0.1'}).text = 'XMPP Journal Publisher (XJP)'
ET.SubElement(feed, 'updated').text = datetime.datetime.now(datetime.UTC).isoformat()
for item in items:
item_id = item['id']
item_payload = item['payload']
namespace = '{http://www.w3.org/2005/Atom}'
title = item.find(namespace + 'title')
title = None if title == None else title.text
updated = item.find(namespace + 'updated')
updated = None if updated == None else updated.text
published = item.find(namespace + 'published')
published = None if published == None else published.text
if not updated and not published: updated = datetime.datetime.utcnow().isoformat()
content = item.find(namespace + 'content')
content = 'No content' if content == None else content.text
link = item.find(namespace + 'link')
link = '' if link == None else link.attrib['href']
author = item.find(namespace + 'author')
title = item_payload.find(namespace + 'title')
title_text = None if title == None else title.text
# link = item_payload.find(namespace + 'link')
# link_href = '' if link == None else link.attrib['href']
link_href = form_an_item_link(pubsub, node, item_id)
if not title_text or not link_href: continue
content = item_payload.find(namespace + 'content')
content_text = 'No content' if content == None else content.text
if content.attrib:
content_type = content.attrib['type'] if 'type' in content.attrib else 'text'
content_type_text = 'html' if 'html' in content_type else 'text'
published = item_payload.find(namespace + 'published')
published_text = None if published == None else published.text
if published: published_dt = parser.parse(published_text)
updated = item_payload.find(namespace + 'updated')
updated_text = None if updated == None else updated.text
author = item_payload.find(namespace + 'author')
if author and author.attrib: print(author.attrib)
author = 'None' if author == None else author.text
# create entry
feed.add_item(
description = content,
# enclosure = feedgenerator.Enclosure(enclosure, enclosure_size, enclosure_type) if args.entry_enclosure else None,
link = link,
pubdate = published or updated,
title = title,
unique_id = link)
xml_atom = feed.writeString('utf-8')
xml_atom_extended = append_element(
xml_atom,
'generator',
'XMPP Journal Publisher (XJP)')
return xml_atom_extended
author_text = 'None' if author == None else author.text
identifier = item_payload.find(namespace + 'id')
if identifier and identifier.attrib: print(identifier.attrib)
identifier_text = 'None' if identifier == None else identifier.text
entry = ET.SubElement(feed, 'entry')
ET.SubElement(entry, 'title').text = title_text
ET.SubElement(entry, 'link', {'href': link_href})
ET.SubElement(entry, 'id').text = identifier_text
ET.SubElement(entry, 'updated').text = updated_text
ET.SubElement(entry, 'published').text = published_text
# ET.SubElement(entry, 'summary', {'type': summary_type_text}).text = summary_text
ET.SubElement(entry, 'content', {'type': content_type_text}).text = content_text
return ET.tostring(feed, encoding='unicode')
def generate_json(iq, node):
def generate_json(iq):
"""Create a JSON file from node items."""
json_data = []
pubsub = iq['from'].bare
node = iq['pubsub']['items']['node']
entries = iq['pubsub']['items']
for entry in entries:
item = entry['payload']
item_id = entry['id']
item_payload = entry['payload']
namespace = '{http://www.w3.org/2005/Atom}'
title = item.find(namespace + 'title')
title = None if title == None else title.text
title = item_payload.find(namespace + 'title')
title_text = None if title == None else title.text
# updated = item.find(namespace + 'updated')
# updated = None if updated == None else updated.text
# if updated: updated = datetime.datetime.fromisoformat(updated)
link = item.find(namespace + 'link')
link = '' if link == None else link.attrib['href']
json_data_entry = {'title' : title,
'link' : link}
link_href = form_an_item_link(pubsub, node, item_id)
# link = item.find(namespace + 'link')
# link_href = '' if link == None else link.attrib['href']
json_data_entry = {'title' : title_text,
'link' : link_href}
json_data.append(json_data_entry)
if len(json_data) > 6: break
filename = 'data/{}.json'.format(node)
@ -286,15 +315,40 @@ def append_element(xml_data, element, text):
"""Patch function to append XSLT reference to XML"""
"""Why is not this a built-in function of ElementTree or LXML"""
def append_stylesheet(xml_data):
def append_stylesheet(xml_data, filename, namespace=None):
# Register namespace in order to avoide ns0:
ET.register_namespace("", "http://www.w3.org/2005/Atom")
if namespace: ET.register_namespace("", namespace)
# Load XML from string
tree = ET.fromstring(xml_data)
# The following direction removes the XML declaration
xml_data_no_declaration = ET.tostring(tree, encoding='unicode')
xml_data_without_a_declaration = ET.tostring(tree, encoding='unicode')
# Add XML declaration and stylesheet
xml_data_declaration = ('<?xml version="1.0" encoding="utf-8"?>'
'<?xml-stylesheet type="text/xsl" href="xsl/stylesheet.xsl"?>' +
xml_data_no_declaration)
xml_data_declaration = (
'<?xml version="1.0" encoding="utf-8"?>'
'<?xml-stylesheet type="text/xsl" href="xsl/{}"?>'.format(filename) +
xml_data_without_a_declaration)
return xml_data_declaration
def generate_opml(iq):
pubsub = iq['from'].bare
items = iq['disco_items']['items']
opml = ET.Element("opml")
opml.set("version", "1.0")
head = ET.SubElement(opml, "head")
ET.SubElement(head, "title").text = pubsub
ET.SubElement(head, "description").text = (
"PubSub Nodes of {}").format(pubsub)
ET.SubElement(head, "generator").text = "XMPP Journal Publisher (XJP)"
ET.SubElement(head, "urlPublic").text = (
"https://git.xmpp-it.net/sch/XMPPJournalPublisher")
time_stamp = datetime.datetime.now(datetime.UTC).isoformat()
ET.SubElement(head, "dateCreated").text = time_stamp
ET.SubElement(head, "dateModified").text = time_stamp
body = ET.SubElement(opml, "body")
for item in items:
pubsub, node, title = item
uri = form_a_node_link(pubsub, node)
outline = ET.SubElement(body, "outline")
outline.set("text", title or node)
outline.set("xmlUrl", uri)
return ET.tostring(opml, encoding='unicode')