Add OPML support;

Set a new default node (Thank you roughnecks); Improve CSS, JS, XSLT; Neglect external libraries to produce syndications.
2024-07-12 15:39:17 +03:00 · 2024-07-12 15:39:17 +03:00 · d1f1edbaca
commit d1f1edbaca
parent e07ff6e838
8 changed files with 519 additions and 185 deletions
--- a/pubsub_to_atom.py
+++ b/pubsub_to_atom.py
@ -2,10 +2,10 @@
 # -*- coding: utf-8 -*-

 import datetime
+from dateutil import parser
 from fastapi import FastAPI, Request, Response
 from fastapi.responses import FileResponse
 from fastapi.staticfiles import StaticFiles
-import feedgenerator
 import json
 from slixmpp import ClientXMPP
 from slixmpp.exceptions import IqError, IqTimeout
@ -57,12 +57,11 @@ async def view_pubsub(request: Request):
            if pubsub and node and item_id:
                iq = await get_node_item(pubsub, node, item_id)
                if iq:
-                    link = 'xmpp:{pubsub}?;node={node};item={item}'.format(
-                        pubsub=pubsub, node=node, item=item_id)
-                    xml_atom = generate_rfc_4287(iq, link)
+                    link = form_an_item_link(pubsub, node, item_id)
+                    xml_atom = generate_atom(iq, link)
                    iq = await get_node_items(pubsub, node)
                    if iq:
-                        generate_json(iq, node)
+                        generate_json(iq)
                    else:
                        operator = get_configuration('settings')['operator']
                        json_data = [{'title' : 'Error retrieving items list.',
@ -79,7 +78,8 @@ async def view_pubsub(request: Request):
                else:
                    text = 'Please check that PubSub node and item are valid and accessible.'
                    xml_atom = error_message(text)
-                result = append_stylesheet(xml_atom)
+                result = append_stylesheet(
+                    xml_atom, 'atom.xsl', namespace='http://www.w3.org/2005/Atom')
                
                # try:
                #     iq = await get_node_items(pubsub, node)
@ -94,32 +94,37 @@ async def view_pubsub(request: Request):
            elif pubsub and node:
                iq = await get_node_items(pubsub, node)
                if iq:
-                    link = form_a_link(pubsub, node)
-                    xml_atom = generate_rfc_4287(iq, link)
+                    link = form_a_node_link(pubsub, node)
+                    xml_atom = generate_atom(iq, link)
                else:
                    text = 'Please check that PubSub node is valid and accessible.'
                    xml_atom = error_message(text)
-                result = append_stylesheet(xml_atom)
+                result = append_stylesheet(
+                    xml_atom, 'atom.xsl', namespace='http://www.w3.org/2005/Atom')
            elif pubsub:
                iq = await get_nodes(pubsub)
                if iq:
                    link = 'xmpp:{pubsub}'.format(pubsub=pubsub)
-                    result = pubsub_to_opml(iq)
+                    xml_opml = generate_opml(iq)
+                    result = append_stylesheet(xml_opml, 'opml.xsl')
                else:
                    text = 'Please check that PubSub Jabber ID is valid and accessible.'
                    xml_atom = error_message(text)
-                result = append_stylesheet(xml_atom)
+                    result = append_stylesheet(
+                        xml_atom, 'atom.xsl', namespace='http://www.w3.org/2005/Atom')
            elif node:
                text = 'PubSub parameter is missing.'
                xml_atom = error_message(text)
-                result = append_stylesheet(xml_atom)
+                result = append_stylesheet(
+                    xml_atom, 'atom.xsl', namespace='http://www.w3.org/2005/Atom')
            # else:
            #     result = ('Mandatory parameter PubSub and '
            #               'optional parameter Node are missing.')
        else:
            text = 'The given domain {} is not allowed.'.format(pubsub)
            xml_atom = error_message(text)
-            result = append_stylesheet(xml_atom)
+            result = append_stylesheet(
+                xml_atom, 'atom.xsl', namespace='http://www.w3.org/2005/Atom')
    default = get_configuration('default')
    if not result:
        if default['pubsub'] and default['nodeid']:
@ -127,20 +132,23 @@ async def view_pubsub(request: Request):
                pubsub = default['pubsub']
                node = default['nodeid']
                iq = await get_node_items(pubsub, node)
-                link = form_a_link(pubsub, node)
+                link = form_a_node_link(pubsub, node)
                xml_atom = generate_rfc_4287(iq, link)
-                result = append_stylesheet(xml_atom)
+                result = append_stylesheet(
+                    xml_atom, 'atom.xsl', namespace='http://www.w3.org/2005/Atom')
            elif not settings['service']:
                pubsub = default['pubsub']
                node = default['nodeid']
                iq = await get_node_items(pubsub, node)
-                link = form_a_link(pubsub, node)
+                link = form_a_node_link(pubsub, node)
                xml_atom = generate_rfc_4287(iq, link)
-                result = append_stylesheet(xml_atom)
+                result = append_stylesheet(
+                    xml_atom, 'atom.xsl', namespace='http://www.w3.org/2005/Atom')
        else:
            text = 'Please contact the administrator and ask him to set default PubSub and Node ID.'
            xml_atom = error_message(text)
-            result = append_stylesheet(xml_atom)
+            result = append_stylesheet(
+                xml_atom, 'atom.xsl', namespace='http://www.w3.org/2005/Atom')
    response = Response(content=result, media_type="application/xml")
    return response

@ -166,104 +174,125 @@ async def get_node_items(pubsub, node):

 async def get_nodes(pubsub):
    try:
-      await xmpp.plugin['xep_0060'].get_nodes(pubsub, timeout=5)
+      iq = await xmpp.plugin['xep_0060'].get_nodes(pubsub, timeout=5)
      return iq
    except (IqError, IqTimeout) as e:
      print(e)

-def form_a_link(pubsub, node):
+def form_a_node_link(pubsub, node):
    link = 'xmpp:{pubsub}?;node={node}'.format(pubsub=pubsub, node=node)
    return link

+def form_an_item_link(pubsub, node, item_id):
+    link = 'xmpp:{pubsub}?;node={node};item={item}'.format(
+        pubsub=pubsub, node=node, item=item_id)
+    return link
+
 def error_message(text):
    """Error message in RFC 4287: The Atom Syndication Format."""
-    feed = feedgenerator.Atom1Feed(
-        description = ('This is a syndication feed generated with XMPP Journal '
-                       'Publisher (XJP), which conveys XEP-0060: Publish-'
-                       'Subscribe nodes to standard RFC 4287: The Atom '
-                       'Syndication Format.'),
-        language = 'en',
-        link = '',
-        subtitle = 'XMPP Journal Publisher',
-        title = 'StreamBurner')
-    namespace = '{http://www.w3.org/2005/Atom}'
-    feed_url = 'gemini://schimon.i2p/'
-    # create entry
-    feed.add_item(
-        description = text,
-        # enclosure = feedgenerator.Enclosure(enclosure, enclosure_size, enclosure_type) if args.entry_enclosure else None,
-        link = '',
-        # pubdate = updated,
-        title = 'Error',
-        # unique_id = ''
-        )
-    xml_atom = feed.writeString('utf-8')
-    xml_atom_extended = append_element(
-        xml_atom,
-        'generator',
-        'XMPP Journal Publisher (XJP)')
-    return xml_atom_extended
+    title = 'StreamBurner'
+    subtitle = 'XMPP Journal Publisher'
+    description = ('This is a syndication feed generated with XMPP Journal '
+                   'Publisher, which conveys XEP-0060: Publish-Subscribe '
+                   'nodes to standard RFC 4287: The Atom Syndication Format.')
+    language = 'en'
+    feed = ET.Element("feed")
+    feed.set('xmlns', 'http://www.w3.org/2005/Atom')
+    ET.SubElement(feed, 'title', {'type': 'text'}).text = title
+    ET.SubElement(feed, 'subtitle', {'type': 'text'}).text = subtitle
+    ET.SubElement(feed, 'author', {'name':'XMPP Journal Publisher','email':'xjp@schimon.i2p'})
+    ET.SubElement(feed, 'generator', {
+        'uri': 'https://git.xmpp-it.net/sch/XMPPJournalPublisher',
+        'version': '0.1'}).text = 'XMPP Journal Publisher (XJP)'
+    ET.SubElement(feed, 'updated').text = datetime.datetime.now(datetime.UTC).isoformat()
+    entry = ET.SubElement(feed, 'entry')
+    ET.SubElement(entry, 'title').text = 'Error'
+    ET.SubElement(entry, 'id').text = 'xjp-error'
+    ET.SubElement(entry, 'updated').text = datetime.datetime.now(datetime.UTC).isoformat()
+    ET.SubElement(entry, 'published').text = datetime.datetime.now(datetime.UTC).isoformat()
+    # ET.SubElement(entry, 'summary', {'type': summary_type_text}).text = summary_text
+    ET.SubElement(entry, 'content', {'type': 'text'}).text = text
+    return ET.tostring(feed, encoding='unicode')

-def generate_rfc_4287(iq, link):
-    """Convert XEP-0060: Publish-Subscribe to RFC 4287: The Atom Syndication Format."""
-    feed = feedgenerator.Atom1Feed(
-        description = ('This is a syndication feed generated with PubSub To '
-                       'Atom, which conveys XEP-0060: Publish-Subscribe nodes '
-                       'to standard RFC 4287: The Atom Syndication Format.'),
-        language = iq['pubsub']['items']['lang'],
-        link = link,
-        subtitle = 'XMPP PubSub Syndication Feed',
-        title = iq['pubsub']['items']['node'])
-    # See also iq['pubsub']['items']['substanzas']
-    entries = iq['pubsub']['items']
-    for entry in entries:
-        item = entry['payload']
+# generate_rfc_4287
+def generate_atom(iq, link):
+    """Generate an Atom Syndication Format (RFC 4287) from a Publish-Subscribe (XEP-0060) node items."""
+    pubsub = iq['from'].bare
+    node = iq['pubsub']['items']['node']
+    title = node
+    link = link
+    # link = form_a_node_link(pubsub, node)
+    subtitle = 'XMPP PubSub Syndication Feed'
+    description = ('This is a syndication feed generated with XMPP Journal '
+                   'Publisher, which conveys XEP-0060: Publish-Subscribe '
+                   'nodes to standard RFC 4287: The Atom Syndication Format.')
+    language = iq['pubsub']['items']['lang']
+    items = iq['pubsub']['items']
+    feed = ET.Element("feed")
+    feed.set('xmlns', 'http://www.w3.org/2005/Atom')
+    ET.SubElement(feed, 'title', {'type': 'text'}).text = title
+    ET.SubElement(feed, 'subtitle', {'type': 'text'}).text = subtitle
+    ET.SubElement(feed, 'link', {'rel': 'self', 'href': link})
+    ET.SubElement(feed, 'generator', {
+        'uri': 'https://git.xmpp-it.net/sch/XMPPJournalPublisher',
+        'version': '0.1'}).text = 'XMPP Journal Publisher (XJP)'
+    ET.SubElement(feed, 'updated').text = datetime.datetime.now(datetime.UTC).isoformat()
+    for item in items:
+        item_id = item['id']
+        item_payload = item['payload']
        namespace = '{http://www.w3.org/2005/Atom}'
-        title = item.find(namespace + 'title')
-        title = None if title == None else title.text
-        updated = item.find(namespace + 'updated')
-        updated = None if updated == None else updated.text
-        published = item.find(namespace + 'published')
-        published = None if published == None else published.text
-        if not updated and not published: updated = datetime.datetime.utcnow().isoformat()
-        content = item.find(namespace + 'content')
-        content = 'No content' if content == None else content.text
-        link = item.find(namespace + 'link')
-        link = '' if link == None else link.attrib['href']
-        author = item.find(namespace + 'author')
+        title = item_payload.find(namespace + 'title')
+        title_text = None if title == None else title.text
+        # link = item_payload.find(namespace + 'link')
+        # link_href = '' if link == None else link.attrib['href']
+        link_href = form_an_item_link(pubsub, node, item_id)
+        if not title_text or not link_href: continue
+        content = item_payload.find(namespace + 'content')
+        content_text = 'No content' if content == None else content.text
+        if content.attrib:
+            content_type = content.attrib['type'] if 'type' in content.attrib else 'text'
+            content_type_text = 'html' if 'html' in content_type else 'text'
+        published = item_payload.find(namespace + 'published')
+        published_text = None if published == None else published.text
+        if published: published_dt = parser.parse(published_text)
+        updated = item_payload.find(namespace + 'updated')
+        updated_text = None if updated == None else updated.text
+        author = item_payload.find(namespace + 'author')
        if author and author.attrib: print(author.attrib)
-        author = 'None' if author == None else author.text
-        # create entry
-        feed.add_item(
-            description = content,
-            # enclosure = feedgenerator.Enclosure(enclosure, enclosure_size, enclosure_type) if args.entry_enclosure else None,
-            link = link,
-            pubdate = published or updated,
-            title = title,
-            unique_id = link)
-    xml_atom = feed.writeString('utf-8')
-    xml_atom_extended = append_element(
-        xml_atom,
-        'generator',
-        'XMPP Journal Publisher (XJP)')
-    return xml_atom_extended
+        author_text = 'None' if author == None else author.text
+        identifier = item_payload.find(namespace + 'id')
+        if identifier and identifier.attrib: print(identifier.attrib)
+        identifier_text = 'None' if identifier == None else identifier.text
+        entry = ET.SubElement(feed, 'entry')
+        ET.SubElement(entry, 'title').text = title_text
+        ET.SubElement(entry, 'link', {'href': link_href})
+        ET.SubElement(entry, 'id').text = identifier_text
+        ET.SubElement(entry, 'updated').text = updated_text
+        ET.SubElement(entry, 'published').text = published_text
+        # ET.SubElement(entry, 'summary', {'type': summary_type_text}).text = summary_text
+        ET.SubElement(entry, 'content', {'type': content_type_text}).text = content_text
+    return ET.tostring(feed, encoding='unicode')

-def generate_json(iq, node):
+def generate_json(iq):
    """Create a JSON file from node items."""
    json_data = []
+    pubsub = iq['from'].bare
+    node = iq['pubsub']['items']['node']
    entries = iq['pubsub']['items']
    for entry in entries:
-        item = entry['payload']
+        item_id = entry['id']
+        item_payload = entry['payload']
        namespace = '{http://www.w3.org/2005/Atom}'
-        title = item.find(namespace + 'title')
-        title = None if title == None else title.text
+        title = item_payload.find(namespace + 'title')
+        title_text = None if title == None else title.text
        # updated = item.find(namespace + 'updated')
        # updated = None if updated == None else updated.text
        # if updated: updated = datetime.datetime.fromisoformat(updated)
-        link = item.find(namespace + 'link')
-        link = '' if link == None else link.attrib['href']
-        json_data_entry = {'title' : title,
-                           'link' : link}
+        link_href = form_an_item_link(pubsub, node, item_id)
+        # link = item.find(namespace + 'link')
+        # link_href = '' if link == None else link.attrib['href']
+        json_data_entry = {'title' : title_text,
+                           'link' : link_href}
        json_data.append(json_data_entry)
        if len(json_data) > 6: break
    filename = 'data/{}.json'.format(node)
@ -286,15 +315,40 @@ def append_element(xml_data, element, text):

 """Patch function to append XSLT reference to XML"""
 """Why is not this a built-in function of ElementTree or LXML"""
-def append_stylesheet(xml_data):
+def append_stylesheet(xml_data, filename, namespace=None):
    # Register namespace in order to avoide ns0:
-    ET.register_namespace("", "http://www.w3.org/2005/Atom")
+    if namespace: ET.register_namespace("", namespace)
    # Load XML from string
    tree = ET.fromstring(xml_data)
    # The following direction removes the XML declaration
-    xml_data_no_declaration = ET.tostring(tree, encoding='unicode')
+    xml_data_without_a_declaration = ET.tostring(tree, encoding='unicode')
    # Add XML declaration and stylesheet
-    xml_data_declaration = ('<?xml version="1.0" encoding="utf-8"?>'
-                            '<?xml-stylesheet type="text/xsl" href="xsl/stylesheet.xsl"?>' +
-                            xml_data_no_declaration)
+    xml_data_declaration = (
+        '<?xml version="1.0" encoding="utf-8"?>'
+        '<?xml-stylesheet type="text/xsl" href="xsl/{}"?>'.format(filename) +
+        xml_data_without_a_declaration)
    return xml_data_declaration
+
+def generate_opml(iq):
+    pubsub = iq['from'].bare
+    items = iq['disco_items']['items']
+    opml = ET.Element("opml")
+    opml.set("version", "1.0")
+    head = ET.SubElement(opml, "head")
+    ET.SubElement(head, "title").text = pubsub
+    ET.SubElement(head, "description").text = (
+        "PubSub Nodes of {}").format(pubsub)
+    ET.SubElement(head, "generator").text = "XMPP Journal Publisher (XJP)"
+    ET.SubElement(head, "urlPublic").text = (
+        "https://git.xmpp-it.net/sch/XMPPJournalPublisher")
+    time_stamp = datetime.datetime.now(datetime.UTC).isoformat()
+    ET.SubElement(head, "dateCreated").text = time_stamp
+    ET.SubElement(head, "dateModified").text = time_stamp
+    body = ET.SubElement(opml, "body")
+    for item in items:
+        pubsub, node, title = item
+        uri = form_a_node_link(pubsub, node)
+        outline = ET.SubElement(body, "outline")
+        outline.set("text", title or node)
+        outline.set("xmlUrl", uri)
+    return ET.tostring(opml, encoding='unicode')