[1] Get the XML Backup Copy

[2] Write Python codes to parse the file

import xml.etree.ElementTree as ET
import html

# Load the XML file
xml_path = "blog-04-07-2025 (1).xml"
tree = ET.parse(xml_path)
root = tree.getroot()

# Define namespaces
namespaces = {
    'atom': 'http://www.w3.org/2005/Atom',
    'gd': 'http://schemas.google.com/g/2005',
    'thr': 'http://purl.org/syndication/thread/1.0'
}

# Extract blog title
blog_title = root.find('atom:title', namespaces).text
print(f"Blog Title: {blog_title}\n")

# Extract all entries
entries = root.findall('atom:entry', namespaces)
print(f"Found {len(entries)} entries.\n")

# Loop through entries
for i, entry in enumerate(entries, start=1):
    title_elem = entry.find('atom:title', namespaces)
    published_elem = entry.find('atom:published', namespaces)
    content_elem = entry.find('atom:content', namespaces)

    title = title_elem.text if title_elem is not None else "No Title"
    published = published_elem.text if published_elem is not None else "No Date"

    if content_elem is not None and content_elem.text:
        content_text = html.unescape(content_elem.text.strip())
        short_preview = content_text[:500] + ("..." if len(content_text) > 500 else "")
        long_preview = content_text

    else:
        short_preview = "No Content"

    print(f"Post {i}:")
    print(f"Title: {title}")
    print(f"Published: {published}")
    print("Content Preview:")
    #print(short_preview)
    print(long_preview)
    print("-" * 80)

Output:

How to Parse a BlogSpot XML Backup Copy

[1] Get the XML Backup Copy

[2] Write Python codes to parse the file

Subscribe to my newsletter

Mohamad Mahmood

Mohamad Mahmood