Add file PyProject;
Support display of a single pubsub node item; Update document README; Modularize code;
This commit is contained in:
parent
37aa7e8f40
commit
5e495579c2
32 changed files with 2431 additions and 2059 deletions
220
jabbercard/utilities/xml.py
Normal file
220
jabbercard/utilities/xml.py
Normal file
|
@ -0,0 +1,220 @@
|
|||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
import xml.etree.ElementTree as ET
|
||||
|
||||
class Syndication:
|
||||
|
||||
# def extract_vcard_items(xml_data):
|
||||
# namespace = '{urn:ietf:params:xml:ns:vcard-4.0}'
|
||||
# title = xml_data.find(namespace + 'title')
|
||||
#
|
||||
# entry = {'fn' : content_text,
|
||||
# 'note' : link_href,
|
||||
# 'email' : published_text,
|
||||
# 'impp' : summary_text,
|
||||
# 'url' : tags}
|
||||
# return entry
|
||||
|
||||
def extract_vcard_items(xml_data):
|
||||
"""Extracts all items from a vCard XML ElementTree.
|
||||
|
||||
Args:
|
||||
xml_data (ElementTree): The vCard XML as an ElementTree object.
|
||||
|
||||
Returns:
|
||||
dict: A dictionary where keys are item names and values are their text content.
|
||||
"""
|
||||
|
||||
items = {}
|
||||
for item in xml_data.iter():
|
||||
# Skip the root element (vcard)
|
||||
if item.tag == '{urn:ietf:params:xml:ns:vcard-4.0}vcard':
|
||||
continue
|
||||
|
||||
# Extract item name and text content
|
||||
item_name = item.tag.split('}')[1]
|
||||
|
||||
# Check for any direct text content or child elements
|
||||
item_text = []
|
||||
if item.text:
|
||||
item_text.append(item.text)
|
||||
for child in item:
|
||||
if child.text:
|
||||
item_text.append(child.text)
|
||||
|
||||
# Join text elements if multiple found
|
||||
if item_text:
|
||||
items[item_name] = ' '.join(item_text).strip() # Strip extra spaces
|
||||
else:
|
||||
items[item_name] = None
|
||||
|
||||
return items
|
||||
|
||||
def extract_vcard4_items(xml_data):
|
||||
namespace = '{urn:ietf:params:xml:ns:vcard-4.0}'
|
||||
vcard = {}
|
||||
|
||||
element_em = xml_data.find(namespace + 'email')
|
||||
element_fn = xml_data.find(namespace + 'fn')
|
||||
element_nn = xml_data.find(namespace + 'nickname')
|
||||
element_nt = xml_data.find(namespace + 'note')
|
||||
element_og = xml_data.find(namespace + 'org')
|
||||
element_im = xml_data.find(namespace + 'impp')
|
||||
element_ul = xml_data.find(namespace + 'url')
|
||||
|
||||
if isinstance(element_em, ET.Element):
|
||||
for i in element_em:
|
||||
text = i.text
|
||||
if text:
|
||||
email = text
|
||||
break
|
||||
else:
|
||||
email = ''
|
||||
else:
|
||||
email = ''
|
||||
if isinstance(element_fn, ET.Element):
|
||||
for i in element_fn:
|
||||
text = i.text
|
||||
if text:
|
||||
title = text
|
||||
break
|
||||
else:
|
||||
title = ''
|
||||
else:
|
||||
title = ''
|
||||
if isinstance(element_nn, ET.Element):
|
||||
for i in element_nn:
|
||||
text = i.text
|
||||
if text:
|
||||
alias = text
|
||||
break
|
||||
else:
|
||||
alias = ''
|
||||
else:
|
||||
alias = ''
|
||||
if isinstance(element_nt, ET.Element):
|
||||
for i in element_nt:
|
||||
text = i.text
|
||||
if text:
|
||||
note = text
|
||||
break
|
||||
else:
|
||||
note = ''
|
||||
else:
|
||||
note = ''
|
||||
if isinstance(element_og, ET.Element):
|
||||
for i in element_og:
|
||||
text = i.text
|
||||
if text:
|
||||
org = text
|
||||
break
|
||||
else:
|
||||
org = ''
|
||||
else:
|
||||
org = ''
|
||||
if isinstance(element_im, ET.Element):
|
||||
for i in element_im:
|
||||
text = i.text
|
||||
if text:
|
||||
impp = text
|
||||
break
|
||||
else:
|
||||
impp = ''
|
||||
else:
|
||||
impp = ''
|
||||
if isinstance(element_ul, ET.Element):
|
||||
for i in element_ul:
|
||||
text = i.text
|
||||
if text:
|
||||
url = text
|
||||
break
|
||||
else:
|
||||
url = ''
|
||||
else:
|
||||
url = ''
|
||||
|
||||
vcard['extras'] = {}
|
||||
for element in xml_data.findall(namespace + "group"):
|
||||
category = '?'
|
||||
for i in element.find(namespace + 'x-ablabel'):
|
||||
txt = i.text
|
||||
for i in element.find(namespace + 'url'):
|
||||
uri = i.text
|
||||
for i in element.find(namespace + 'url/' + namespace + 'parameters/' + namespace + 'type'):
|
||||
category = i.text
|
||||
if not category in vcard['extras']: vcard['extras'][category] = []
|
||||
vcard['extras'][category].append({'label' : txt, 'uri' : uri})
|
||||
|
||||
vcard['alias'] = alias
|
||||
vcard['email'] = email
|
||||
vcard['fn'] = title
|
||||
vcard['note'] = note
|
||||
vcard['org'] = org
|
||||
vcard['impp'] = impp
|
||||
vcard['url'] = url
|
||||
return vcard
|
||||
|
||||
|
||||
def extract_atom_items(xml_data, limit=False):
|
||||
# NOTE
|
||||
# `.//` was not needded when node item payload was passed directly.
|
||||
# Now that item is saved as xml, it is required to use `.//`.
|
||||
# Perhaps navigating a level down (i.e. to "child"), or removing the root from the file would solve this.
|
||||
#namespace = './/{http://www.w3.org/2005/Atom}'
|
||||
namespace = '{http://www.w3.org/2005/Atom}'
|
||||
title = xml_data.find(namespace + 'title')
|
||||
links = xml_data.find(namespace + 'link')
|
||||
if (not isinstance(title, ET.Element) and
|
||||
not isinstance(links, ET.Element)): return None
|
||||
title_text = '' if title == None else title.text
|
||||
link_href = ''
|
||||
if isinstance(links, ET.Element):
|
||||
for link in xml_data.findall(namespace + 'link'):
|
||||
link_href = link.attrib['href'] if 'href' in link.attrib else ''
|
||||
if link_href: break
|
||||
contents = xml_data.find(namespace + 'content')
|
||||
content_text = ''
|
||||
if isinstance(contents, ET.Element):
|
||||
for content in xml_data.findall(namespace + 'content'):
|
||||
content_text = content.text or ''
|
||||
if content_text: break
|
||||
summaries = xml_data.find(namespace + 'summary')
|
||||
summary_text = ''
|
||||
if isinstance(summaries, ET.Element):
|
||||
for summary in xml_data.findall(namespace + 'summary'):
|
||||
summary_text = summary.text or ''
|
||||
if summary_text: break
|
||||
published = xml_data.find(namespace + 'published')
|
||||
published_text = '' if published == None else published.text
|
||||
categories = xml_data.find(namespace + 'category')
|
||||
tags = []
|
||||
if isinstance(categories, ET.Element):
|
||||
for category in xml_data.findall(namespace + 'category'):
|
||||
if 'term' in category.attrib and category.attrib['term']:
|
||||
category_term = category.attrib['term']
|
||||
if len(category_term) < 20:
|
||||
tags.append(category_term)
|
||||
elif len(category_term) < 50:
|
||||
tags.append(category_term)
|
||||
if limit and len(tags) > 4: break
|
||||
|
||||
|
||||
identifier = xml_data.find(namespace + 'id')
|
||||
if identifier and identifier.attrib: print(identifier.attrib)
|
||||
identifier_text = '' if identifier == None else identifier.text
|
||||
|
||||
instances = '' # TODO Check the Blasta database for instances.
|
||||
|
||||
entry = {'content' : content_text,
|
||||
'href' : link_href,
|
||||
'published' : published_text,
|
||||
'summary' : summary_text,
|
||||
'tags' : tags,
|
||||
'title' : title_text,
|
||||
'updated' : published_text} # TODO "Updated" is missing
|
||||
return entry
|
||||
|
||||
|
||||
|
||||
|
Loading…
Add table
Add a link
Reference in a new issue