How to Parse a BlogSpot XML Backup Copy

1 min read
[1] Get the XML Backup Copy
[2] Write Python codes to parse the file
import xml.etree.ElementTree as ET
import html
# Load the XML file
xml_path = "blog-04-07-2025 (1).xml"
tree = ET.parse(xml_path)
root = tree.getroot()
# Define namespaces
namespaces = {
'atom': 'http://www.w3.org/2005/Atom',
'gd': 'http://schemas.google.com/g/2005',
'thr': 'http://purl.org/syndication/thread/1.0'
}
# Extract blog title
blog_title = root.find('atom:title', namespaces).text
print(f"Blog Title: {blog_title}\n")
# Extract all entries
entries = root.findall('atom:entry', namespaces)
print(f"Found {len(entries)} entries.\n")
# Loop through entries
for i, entry in enumerate(entries, start=1):
title_elem = entry.find('atom:title', namespaces)
published_elem = entry.find('atom:published', namespaces)
content_elem = entry.find('atom:content', namespaces)
title = title_elem.text if title_elem is not None else "No Title"
published = published_elem.text if published_elem is not None else "No Date"
if content_elem is not None and content_elem.text:
content_text = html.unescape(content_elem.text.strip())
short_preview = content_text[:500] + ("..." if len(content_text) > 500 else "")
long_preview = content_text
else:
short_preview = "No Content"
print(f"Post {i}:")
print(f"Title: {title}")
print(f"Published: {published}")
print("Content Preview:")
#print(short_preview)
print(long_preview)
print("-" * 80)
Output:
0
Subscribe to my newsletter
Read articles from Mohamad Mahmood directly inside your inbox. Subscribe to the newsletter, and don't miss out.
Written by

Mohamad Mahmood
Mohamad Mahmood
Mohamad's interest is in Programming (Mobile, Web, Database and Machine Learning). He studies at the Center For Artificial Intelligence Technology (CAIT), Universiti Kebangsaan Malaysia (UKM).