Convert HTML to Markdown in JavaScript for Projects with Astro and Tailwind CSS

Originally posted on: https://lexingtonthemes.com/blog/posts/copy-page-content-as-markdown/

Creating modern websites with Astro and Tailwind CSS? This JavaScript utility offers an elegant solution for transforming HTML content into pristine Markdown format. Whether you’re developing an Astro blog, preparing content for AI tools like ChatGPT and Claude, or transferring content across platforms, this guide demonstrates how to extract webpage content as beautifully formatted Markdown.

Test the button above this section and paste the clipboard content into your Markdown editor to witness the transformation.

The Case for HTML-to-Markdown Conversion in Astro and Tailwind Development

When developing Astro websites with Tailwind CSS, you frequently encounter content that requires:

  • Preparation for AI interactions (ChatGPT, Claude, Gemini)

  • Transfer between content management platforms

  • Transformation from HTML to Markdown for documentation purposes

  • Integration with headless CMS solutions

  • Distribution to clients in accessible format

Standard HTML becomes unwieldy and filled with CSS classes, particularly when utilizing Tailwind’s utility-first methodology. Markdown delivers a streamlined, readable format that’s ideal for contemporary web development processes.

The JavaScript Approach for Astro Development

This compact JavaScript converter integrates flawlessly with Astro components and Tailwind CSS styling. No external libraries needed — pure JavaScript that harmonizes perfectly with your Astro development workflow.

Configuring the HTML Framework in Your Astro Component

Begin by incorporating the conversion button into your Astro component:

<button id="copy-markdown" class="px-4 py-2 bg-blue-500 text-white rounded hover:bg-blue-600">
  Copy page Markdown for LLMs
</button>

This button utilizes Tailwind CSS classes for appearance and will initiate the HTML-to-Markdown transformation.

The Full JavaScript Solution

Here’s the production-ready JavaScript code that functions excellently with Astro’s client-side scripting:

// HTML to Markdown converter - Perfect for Astro projects
function htmlToMarkdown(html) {
  const temp = document.createElement("div");
  temp.innerHTML = html;
function processNode(node) {
    let result = "";
    if (node.nodeType === Node.TEXT_NODE) {
      return node.textContent || "";
    }
    if (node.nodeType === Node.ELEMENT_NODE) {
      const element = node;
      const tagName = element.tagName.toLowerCase();
      const children = Array.from(element.childNodes);
      const childContent = children.map((child) => processNode(child)).join("");
      switch (tagName) {
        case "h1":
          result = `# ${childContent}\n\n`;
          break;
        case "h2":
          result = `## ${childContent}\n\n`;
          break;
        case "h3":
          result = `### ${childContent}\n\n`;
          break;
        case "h4":
          result = `#### ${childContent}\n\n`;
          break;
        case "h5":
          result = `##### ${childContent}\n\n`;
          break;
        case "h6":
          result = `###### ${childContent}\n\n`;
          break;
        case "p":
          result = `${childContent}\n\n`;
          break;
        case "strong":
        case "b":
          result = `**${childContent}**`;
          break;
        case "em":
        case "i":
          result = `*${childContent}*`;
          break;
        case "code":
          if (element.parentElement?.tagName.toLowerCase() === "pre") {
            result = childContent;
          } else {
            result = `\`${childContent}\``;
          }
          break;
        case "pre":
          result = `\`\`\`\n${childContent}\n\`\`\`\n\n`;
          break;
        case "a":
          const href = element.getAttribute("href") || "";
          result = `[${childContent}](${href})`;
          break;
        case "ul":
          result = `${childContent}\n`;
          break;
        case "ol":
          result = `${childContent}\n`;
          break;
        case "li":
          const parent = element.parentElement;
          if (parent?.tagName.toLowerCase() === "ul") {
            result = `- ${childContent}\n`;
          } else if (parent?.tagName.toLowerCase() === "ol") {
            const index = Array.from(parent.children).indexOf(element) + 1;
            result = `${index}. ${childContent}\n`;
          }
          break;
        case "blockquote":
          result = `> ${childContent}\n\n`;
          break;
        case "br":
          result = "\n";
          break;
        case "hr":
          result = "---\n\n";
          break;
        default:
          result = childContent;
      }
    }
    return result;
  }
  let markdown = "";
  Array.from(temp.childNodes).forEach((node) => {
    markdown += processNode(node);
  });
  return markdown.replace(/\n{3,}/g, "\n\n").trim();
}
// Astro-compatible event listener
document.addEventListener("DOMContentLoaded", () => {
  const button = document.getElementById("copy-markdown");
  const contentDiv = document.getElementById("markdown-content");
  if (!button || !contentDiv) {
    console.error("Missing button or content div.");
    return;
  }
  button.addEventListener("click", async () => {
    try {
      // Works with Astro's component structure
      const proseWrapper = contentDiv.querySelector('[class*="prose"]');
      const htmlContent = proseWrapper
        ? proseWrapper.innerHTML
        : contentDiv.innerHTML;
      // Convert HTML to clean Markdown
      const markdownContent = htmlToMarkdown(htmlContent);
      await navigator.clipboard.writeText(markdownContent);
      // Tailwind CSS classes for visual feedback
      button.textContent = "Copied!";
      button.className = "px-4 py-2 bg-green-500 text-white rounded";
      setTimeout(() => {
        button.textContent = "Copy page as Markdown";
        button.className = "px-4 py-2 bg-blue-500 text-white rounded hover:bg-blue-600";
      }, 2000);
    } catch (err) {
      console.error("Failed to copy:", err);
      button.textContent = "Error copying";
      button.className = "px-4 py-2 bg-red-500 text-white rounded";
    }
  });
});

Understanding the Astro Integration Mechanics

Astro Component Harmony

The converter is engineered to function smoothly with Astro’s component framework:

  1. Client-side processing: Employs document.addEventListener for appropriate hydration

  2. Component identification: Locates content within Astro component containers

  3. Prose recognition: Automatically identifies Tailwind’s prose classes

  4. Error management: Handles missing Astro components gracefully

Tailwind CSS Enhancement

The script incorporates Tailwind CSS refinements:

  • Utility class elimination: Excludes Tailwind utility classes from output

  • Prose wrapper identification: Focuses on content within prose classes

  • Visual confirmation: Employs Tailwind classes for button status updates

  • Responsive compatibility: Functions with Tailwind’s responsive utilities

HTML Element Coverage for Astro Development

Ideal for Astro blog content and Tailwind CSS styled elements:

  • Headers (h1-h6) → Clean Markdown headings

  • Paragraphs → Appropriately spaced text sections

  • Bold/Italic → Markdown text formatting

  • Code sections → Syntax highlighting maintained

  • Hyperlinks → Working Markdown links

  • Lists → Bullet points and numbered sequences

  • Blockquotes → Quote styling

  • Images → Alt text retention

Enhanced Capabilities for Astro Developers

Content Island Compatibility

Functions with Astro’s Islands Architecture:

  • Handles server-rendered content

  • Manages client-side components

  • Preserves formatting across islands

Tailwind CSS Enhancement

Eliminates unnecessary styling for cleaner output:

  • Removes utility classes

  • Maintains semantic content

  • Preserves responsive breakpoints within content

Ideal for Contemporary Web Development Processes

AI-Enhanced Development

Crucial for developers utilizing AI tools with Astro:

  • ChatGPT integration: Clean content for AI processing

  • Claude interactions: Properly structured text

  • GitHub Copilot: Enhanced code recommendations

  • Content creation: AI-compatible input formatting

Content Management Solutions

Optimizes Astro CMS processes:

  • Headless CMS transfer: Simplified content migration

  • Static site creation: Clean Markdown for builds

  • Blog development: Writer-friendly formatting

  • Documentation: Developer-oriented docs

Integration with Common Astro Configurations

Astro + Tailwind CSS

---
// Perfect for Astro components
import Layout from '../layouts/Layout.astro';
---
<Layout title="My Blog Post">
  <div id="markdown-content" class="prose prose-lg mx-auto">
    <h1>My Astro Blog Post</h1>
    <p class="text-gray-600">Clean content extraction</p>
  </div>
</Layout>

Astro + MDX

Functions flawlessly with MDX in Astro projects:

  • Transforms MDX output to clean Markdown

  • Maintains component structure

  • Preserves frontmatter compatibility

Astro + Content Collections

Excellent for Astro’s Content Collections:

  • Extract content for collection handling

  • Create clean Markdown files

  • Ensure consistent formatting

Performance Enhancements for Astro

Bundle Efficiency

  • No dependencies: Zero external libraries

  • Tree-shakeable: Contains only essential code

  • Astro-optimized: Compatible with Astro’s build system

Runtime Efficiency

  • Streamlined DOM parsing: Minimal memory consumption

  • Rapid conversion: Optimized for substantial content

  • Non-intrusive: Doesn’t disrupt Astro hydration

Development Guidelines for Astro Projects

  1. Component Organization: Enclose content in id="markdown-content" div

  2. Prose Implementation: Use Tailwind’s prose utilities for styling

  3. Error Prevention: Always verify required elements exist

  4. User Experience: Deliver clear visual feedback with Tailwind classes

  5. Mobile Compatibility: Ensure button functionality on touch devices

Summary

This HTML-to-Markdown converter represents a vital utility for contemporary Astro and Tailwind CSS developers. It connects styled HTML content with clean Markdown output, making it invaluable for AI processes, content migration, and documentation projects.

The lightweight, dependency-free approach integrates effortlessly with Astro’s framework while using Tailwind’s utility classes for a refined user experience. Whether you’re constructing a blog, documentation site, or content platform with Astro, this converter will optimize your workflow and enhance your productivity.

0
Subscribe to my newsletter

Read articles from Michael Andreuzza directly inside your inbox. Subscribe to the newsletter, and don't miss out.

Written by

Michael Andreuzza
Michael Andreuzza

↳ Building: http://lexingtonthemes.com