ironhtml_parser/
lib.rs

1//! # ironhtml-parser
2//!
3//! HTML5 parser following the [WHATWG HTML Living Standard](https://html.spec.whatwg.org/).
4//!
5//! This crate provides a parser that converts HTML strings into a DOM tree,
6//! handling malformed HTML gracefully like browsers do.
7//!
8//! ## Features
9//!
10//! - Parse HTML5 documents and fragments
11//! - Handles malformed HTML gracefully
12//! - `no_std` compatible (with alloc)
13//! - Produces a DOM tree that can be traversed and validated
14//!
15//! ## Example
16//!
17//! ```rust
18//! use ironhtml_parser::{parse, parse_fragment, Node};
19//!
20//! // Parse a complete document
21//! let doc = parse("<!DOCTYPE html><html><body><p>Hello</p></body></html>");
22//! assert!(doc.doctype.is_some());
23//!
24//! // Parse a fragment
25//! let nodes = parse_fragment("<div class=\"container\"><span>Text</span></div>");
26//! assert_eq!(nodes.len(), 1);
27//! ```
28//!
29//! ## Specification Reference
30//!
31//! - [HTML Parsing](https://html.spec.whatwg.org/multipage/parsing.html)
32//! - [Tokenization](https://html.spec.whatwg.org/multipage/parsing.html#tokenization)
33//! - [Tree Construction](https://html.spec.whatwg.org/multipage/parsing.html#tree-construction)
34
35#![cfg_attr(not(feature = "std"), no_std)]
36
37#[cfg(feature = "std")]
38extern crate std;
39
40extern crate alloc;
41
42mod dom;
43mod tokenizer;
44mod tree_builder;
45mod validator;
46
47pub use dom::{Attribute, Document, Element, Node, NodeType, Text};
48pub use tokenizer::{Token, Tokenizer};
49pub use tree_builder::TreeBuilder;
50pub use validator::{ValidationError, ValidationResult, Validator};
51
52use alloc::vec::Vec;
53
54/// Parse an HTML document string into a Document.
55///
56/// This handles the full document including doctype, html, head, and body elements.
57///
58/// ## Example
59///
60/// ```rust
61/// use ironhtml_parser::parse;
62///
63/// let doc = parse("<!DOCTYPE html><html><head><title>Test</title></head><body><p>Hello</p></body></html>");
64/// assert!(doc.doctype.is_some());
65/// assert!(!doc.root.children.is_empty());
66/// ```
67#[must_use]
68pub fn parse(html: &str) -> Document {
69    let tokenizer = Tokenizer::new(html);
70    let mut builder = TreeBuilder::new();
71
72    for token in tokenizer {
73        builder.process_token(token);
74    }
75
76    builder.finish()
77}
78
79/// Parse an HTML fragment string into a list of nodes.
80///
81/// This is useful for parsing partial HTML content like template snippets.
82///
83/// ## Example
84///
85/// ```rust
86/// use ironhtml_parser::{parse_fragment, NodeType};
87///
88/// let nodes = parse_fragment("<div><span>Hello</span></div>");
89/// assert_eq!(nodes.len(), 1);
90/// assert_eq!(nodes[0].node_type(), NodeType::Element);
91/// ```
92#[must_use]
93pub fn parse_fragment(html: &str) -> Vec<Node> {
94    let tokenizer = Tokenizer::new(html);
95    let mut builder = TreeBuilder::new();
96    builder.set_fragment_mode(true);
97
98    for token in tokenizer {
99        builder.process_token(token);
100    }
101
102    builder.finish_fragment()
103}
104
105/// Validate an HTML document and return any errors.
106///
107/// ## Example
108///
109/// ```rust
110/// use ironhtml_parser::{parse, validate};
111///
112/// let doc = parse("<img>");
113/// let errors = validate(&doc);
114/// // img without alt attribute would be a validation error
115/// assert!(!errors.is_empty());
116/// ```
117#[must_use]
118pub fn validate(doc: &Document) -> Vec<ValidationError> {
119    let validator = Validator::new();
120    validator.validate(doc)
121}
122
123/// Validate an HTML fragment and return any errors.
124#[must_use]
125pub fn validate_fragment(nodes: &[Node]) -> Vec<ValidationError> {
126    let validator = Validator::new();
127    validator.validate_nodes(nodes)
128}