Add file PyProject;

Support display of a single pubsub node item; Update document README; Modularize code;
2024-11-17 17:30:38 +02:00 · 2024-11-17 17:30:38 +02:00 · 5e495579c2
commit 5e495579c2
parent 37aa7e8f40
32 changed files with 2431 additions and 2059 deletions
--- a/jabbercard/utilities/xml.py
+++ b/jabbercard/utilities/xml.py
@ -0,0 +1,220 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+import xml.etree.ElementTree as ET
+
+class Syndication:
+
+#   def extract_vcard_items(xml_data):
+#       namespace = '{urn:ietf:params:xml:ns:vcard-4.0}'
+#       title = xml_data.find(namespace + 'title')
+#
+#       entry = {'fn' : content_text,
+#                'note' : link_href,
+#                'email' : published_text,
+#                'impp' : summary_text,
+#                'url' : tags}
+#       return entry
+
+    def extract_vcard_items(xml_data):
+        """Extracts all items from a vCard XML ElementTree.
+
+        Args:
+            xml_data (ElementTree): The vCard XML as an ElementTree object.
+
+        Returns:
+            dict: A dictionary where keys are item names and values are their text content.
+        """
+
+        items = {}
+        for item in xml_data.iter():
+            # Skip the root element (vcard)
+            if item.tag == '{urn:ietf:params:xml:ns:vcard-4.0}vcard':
+                continue
+
+            # Extract item name and text content
+            item_name = item.tag.split('}')[1]
+
+            # Check for any direct text content or child elements
+            item_text = []
+            if item.text:
+                item_text.append(item.text)
+            for child in item:
+                if child.text:
+                    item_text.append(child.text)
+
+            # Join text elements if multiple found
+            if item_text:
+                items[item_name] = ' '.join(item_text).strip() # Strip extra spaces
+            else:
+                items[item_name] = None
+
+        return items
+
+    def extract_vcard4_items(xml_data):
+        namespace = '{urn:ietf:params:xml:ns:vcard-4.0}'
+        vcard = {}
+
+        element_em = xml_data.find(namespace + 'email')
+        element_fn = xml_data.find(namespace + 'fn')
+        element_nn = xml_data.find(namespace + 'nickname')
+        element_nt = xml_data.find(namespace + 'note')
+        element_og = xml_data.find(namespace + 'org')
+        element_im = xml_data.find(namespace + 'impp')
+        element_ul = xml_data.find(namespace + 'url')
+
+        if isinstance(element_em, ET.Element):
+            for i in element_em:
+                text = i.text
+                if text:
+                    email = text
+                    break
+                else:
+                    email = ''
+        else:
+            email = ''
+        if isinstance(element_fn, ET.Element):
+            for i in element_fn:
+                text = i.text
+                if text:
+                    title = text
+                    break
+                else:
+                    title = ''
+        else:
+            title = ''
+        if isinstance(element_nn, ET.Element):
+            for i in element_nn:
+                text = i.text
+                if text:
+                    alias = text
+                    break
+                else:
+                    alias = ''
+        else:
+            alias = ''
+        if isinstance(element_nt, ET.Element):
+            for i in element_nt:
+                text = i.text
+                if text:
+                    note = text
+                    break
+                else:
+                    note = ''
+        else:
+            note = ''
+        if isinstance(element_og, ET.Element):
+            for i in element_og:
+                text = i.text
+                if text:
+                    org = text
+                    break
+                else:
+                    org = ''
+        else:
+            org = ''
+        if isinstance(element_im, ET.Element):
+            for i in element_im:
+                text = i.text
+                if text:
+                    impp = text
+                    break
+                else:
+                    impp = ''
+        else:
+            impp = ''
+        if isinstance(element_ul, ET.Element):
+            for i in element_ul:
+                text = i.text
+                if text:
+                    url = text
+                    break
+                else:
+                    url = ''
+        else:
+            url = ''
+
+        vcard['extras'] = {}
+        for element in xml_data.findall(namespace + "group"):
+            category = '?'
+            for i in element.find(namespace + 'x-ablabel'):
+                txt = i.text
+            for i in element.find(namespace + 'url'):
+                uri = i.text
+            for i in element.find(namespace + 'url/' + namespace + 'parameters/' + namespace + 'type'):
+                category = i.text
+            if not category in vcard['extras']: vcard['extras'][category] = []
+            vcard['extras'][category].append({'label' : txt, 'uri' : uri})
+
+        vcard['alias'] = alias
+        vcard['email'] = email
+        vcard['fn'] = title
+        vcard['note'] = note
+        vcard['org'] = org
+        vcard['impp'] = impp
+        vcard['url'] = url
+        return vcard
+
+
+    def extract_atom_items(xml_data, limit=False):
+        # NOTE
+        # `.//` was not needded when node item payload was passed directly.
+        # Now that item is saved as xml, it is required to use `.//`.
+        # Perhaps navigating a level down (i.e. to "child"), or removing the root from the file would solve this.
+        #namespace = './/{http://www.w3.org/2005/Atom}'
+        namespace = '{http://www.w3.org/2005/Atom}'
+        title = xml_data.find(namespace + 'title')
+        links = xml_data.find(namespace + 'link')
+        if (not isinstance(title, ET.Element) and
+            not isinstance(links, ET.Element)): return None
+        title_text = '' if title == None else title.text
+        link_href = ''
+        if isinstance(links, ET.Element):
+            for link in xml_data.findall(namespace + 'link'):
+                link_href = link.attrib['href'] if 'href' in link.attrib else ''
+                if link_href: break
+        contents = xml_data.find(namespace + 'content')
+        content_text = ''
+        if isinstance(contents, ET.Element):
+            for content in xml_data.findall(namespace + 'content'):
+                content_text = content.text or ''
+                if content_text: break
+        summaries = xml_data.find(namespace + 'summary')
+        summary_text = ''
+        if isinstance(summaries, ET.Element):
+            for summary in xml_data.findall(namespace + 'summary'):
+                summary_text = summary.text or ''
+                if summary_text: break
+        published = xml_data.find(namespace + 'published')
+        published_text = '' if published == None else published.text
+        categories = xml_data.find(namespace + 'category')
+        tags = []
+        if isinstance(categories, ET.Element):
+            for category in xml_data.findall(namespace + 'category'):
+                if 'term' in category.attrib and category.attrib['term']:
+                    category_term = category.attrib['term']
+                    if len(category_term) < 20:
+                        tags.append(category_term)
+                    elif len(category_term) < 50:
+                        tags.append(category_term)
+                    if limit and len(tags) > 4: break
+    
+    
+        identifier = xml_data.find(namespace + 'id')
+        if identifier and identifier.attrib: print(identifier.attrib)
+        identifier_text = '' if identifier == None else identifier.text
+    
+        instances = '' # TODO Check the Blasta database for instances.
+    
+        entry = {'content' : content_text,
+                 'href' : link_href,
+                 'published' : published_text,
+                 'summary' : summary_text,
+                 'tags' : tags,
+                 'title' : title_text,
+                 'updated' : published_text} # TODO "Updated" is missing
+        return entry
+
+
+
+