How to Parse a BlogSpot XML Backup Copy

Mohamad MahmoodMohamad Mahmood
1 min read

[1] Get the XML Backup Copy

[2] Write Python codes to parse the file

import xml.etree.ElementTree as ET
import html

# Load the XML file
xml_path = "blog-04-07-2025 (1).xml"
tree = ET.parse(xml_path)
root = tree.getroot()

# Define namespaces
namespaces = {
    'atom': 'http://www.w3.org/2005/Atom',
    'gd': 'http://schemas.google.com/g/2005',
    'thr': 'http://purl.org/syndication/thread/1.0'
}

# Extract blog title
blog_title = root.find('atom:title', namespaces).text
print(f"Blog Title: {blog_title}\n")

# Extract all entries
entries = root.findall('atom:entry', namespaces)
print(f"Found {len(entries)} entries.\n")

# Loop through entries
for i, entry in enumerate(entries, start=1):
    title_elem = entry.find('atom:title', namespaces)
    published_elem = entry.find('atom:published', namespaces)
    content_elem = entry.find('atom:content', namespaces)

    title = title_elem.text if title_elem is not None else "No Title"
    published = published_elem.text if published_elem is not None else "No Date"

    if content_elem is not None and content_elem.text:
        content_text = html.unescape(content_elem.text.strip())
        short_preview = content_text[:500] + ("..." if len(content_text) > 500 else "")
        long_preview = content_text

    else:
        short_preview = "No Content"

    print(f"Post {i}:")
    print(f"Title: {title}")
    print(f"Published: {published}")
    print("Content Preview:")
    #print(short_preview)
    print(long_preview)
    print("-" * 80)

Output:

0
Subscribe to my newsletter

Read articles from Mohamad Mahmood directly inside your inbox. Subscribe to the newsletter, and don't miss out.

Written by

Mohamad Mahmood
Mohamad Mahmood

Mohamad's interest is in Programming (Mobile, Web, Database and Machine Learning). He studies at the Center For Artificial Intelligence Technology (CAIT), Universiti Kebangsaan Malaysia (UKM).