1use alloc::string::String;
10use alloc::vec::Vec;
11
12use crate::dom::{Attribute, Comment, Document, DocumentType, Element, Node, Text};
13use crate::tokenizer::Token;
14
15pub struct TreeBuilder {
19 document: Document,
21 open_elements: Vec<usize>,
23 open_element_names: Vec<String>,
25 fragment_mode: bool,
27 insertion_mode: InsertionMode,
29 pending_text: String,
31}
32
33#[derive(Debug, Clone, Copy, PartialEq)]
35enum InsertionMode {
36 Initial,
37 BeforeHtml,
38 BeforeHead,
39 InHead,
40 AfterHead,
41 InBody,
42 AfterBody,
43 AfterAfterBody,
44}
45
46fn navigate_to_element<'a>(root: &'a mut Element, path: &[usize]) -> &'a mut Element {
51 let mut current = root;
52
53 for &idx in path.iter().skip(1) {
54 if idx < current.children.len() && matches!(current.children[idx], Node::Element(_)) {
55 current = match &mut current.children[idx] {
56 Node::Element(elem) => elem,
57 _ => unreachable!(),
58 };
59 } else {
60 break;
61 }
62 }
63
64 current
65}
66
67impl TreeBuilder {
68 #[must_use]
70 pub fn new() -> Self {
71 Self {
72 document: Document::new(),
73 open_elements: Vec::new(),
74 open_element_names: Vec::new(),
75 fragment_mode: false,
76 insertion_mode: InsertionMode::Initial,
77 pending_text: String::new(),
78 }
79 }
80
81 pub fn set_fragment_mode(&mut self, fragment: bool) {
87 self.fragment_mode = fragment;
88 if fragment {
89 self.insertion_mode = InsertionMode::InBody;
90 self.open_elements.push(0);
92 self.open_element_names.push(String::new());
93 }
94 }
95
96 pub fn process_token(&mut self, token: Token) {
98 match &token {
100 Token::Character(_) => {}
101 _ => self.flush_pending_text(),
102 }
103
104 match token {
105 Token::Doctype {
106 name,
107 public_id,
108 system_id,
109 } => {
110 self.process_doctype(name, public_id, system_id);
111 }
112 Token::StartTag {
113 name,
114 attributes,
115 self_closing,
116 } => {
117 self.process_start_tag(&name, attributes, self_closing);
118 }
119 Token::EndTag { name } => {
120 self.process_end_tag(&name);
121 }
122 Token::Comment(data) => {
123 self.process_comment(data);
124 }
125 Token::Character(c) => {
126 self.pending_text.push(c);
127 }
128 Token::Eof => {
129 self.flush_pending_text();
130 }
131 }
132 }
133
134 fn flush_pending_text(&mut self) {
135 if self.pending_text.is_empty() {
136 return;
137 }
138
139 let text = core::mem::take(&mut self.pending_text);
140
141 if text.chars().all(|c| c.is_ascii_whitespace()) {
143 match self.insertion_mode {
144 InsertionMode::Initial
145 | InsertionMode::BeforeHtml
146 | InsertionMode::BeforeHead
147 | InsertionMode::AfterHead
148 | InsertionMode::AfterBody
149 | InsertionMode::AfterAfterBody => return,
150 _ => {}
151 }
152 }
153
154 self.insert_text(text);
155 }
156
157 fn process_doctype(
158 &mut self,
159 name: Option<String>,
160 public_id: Option<String>,
161 system_id: Option<String>,
162 ) {
163 if self.insertion_mode == InsertionMode::Initial {
164 self.document.doctype = Some(DocumentType {
165 name: name.unwrap_or_default(),
166 public_id,
167 system_id,
168 });
169 self.insertion_mode = InsertionMode::BeforeHtml;
170 }
171 }
172
173 #[allow(clippy::too_many_lines, clippy::only_used_in_recursion)]
174 fn process_start_tag(
175 &mut self,
176 name: &str,
177 attributes: Vec<(String, String)>,
178 self_closing: bool,
179 ) {
180 let name_lower = name.to_ascii_lowercase();
181
182 match self.insertion_mode {
183 InsertionMode::Initial => {
184 self.insertion_mode = InsertionMode::BeforeHtml;
186 self.process_start_tag(name, attributes, self_closing);
187 }
188
189 InsertionMode::BeforeHtml => {
190 if name_lower == "html" {
191 self.create_html_element(attributes);
192 self.insertion_mode = InsertionMode::BeforeHead;
193 } else {
194 self.create_html_element(Vec::new());
196 self.insertion_mode = InsertionMode::BeforeHead;
197 self.process_start_tag(name, attributes, self_closing);
198 }
199 }
200
201 InsertionMode::BeforeHead => {
202 if name_lower == "head" {
203 self.insert_element(&name_lower, attributes);
204 self.insertion_mode = InsertionMode::InHead;
205 } else if name_lower == "html" {
206 for (key, value) in attributes {
208 self.document.root.set_attribute(key, value);
209 }
210 } else {
211 self.insert_element("head", Vec::new());
213 self.insertion_mode = InsertionMode::InHead;
214 self.process_start_tag(name, attributes, self_closing);
215 }
216 }
217
218 InsertionMode::InHead => {
219 match name_lower.as_str() {
220 "meta" | "link" | "base" => {
221 self.insert_element(&name_lower, attributes);
222 self.pop_element();
223 }
224 "title" | "style" | "script" | "noscript" => {
225 self.insert_element(&name_lower, attributes);
226 }
227 "head" => {
228 }
230 "body" => {
231 self.pop_element(); self.insertion_mode = InsertionMode::AfterHead;
233 self.process_start_tag(name, attributes, self_closing);
234 }
235 _ => {
236 self.pop_element();
238 self.insertion_mode = InsertionMode::AfterHead;
239 self.process_start_tag(name, attributes, self_closing);
240 }
241 }
242 }
243
244 InsertionMode::AfterHead => {
245 if name_lower == "body" {
246 self.insert_element(&name_lower, attributes);
247 self.insertion_mode = InsertionMode::InBody;
248 } else if name_lower == "html" {
249 for (key, value) in attributes {
251 self.document.root.set_attribute(key, value);
252 }
253 } else {
254 self.insert_element("body", Vec::new());
256 self.insertion_mode = InsertionMode::InBody;
257 self.process_start_tag(name, attributes, self_closing);
258 }
259 }
260
261 InsertionMode::InBody => {
262 let is_void = matches!(
263 name_lower.as_str(),
264 "area"
265 | "base"
266 | "br"
267 | "col"
268 | "embed"
269 | "hr"
270 | "img"
271 | "input"
272 | "link"
273 | "meta"
274 | "source"
275 | "track"
276 | "wbr"
277 );
278
279 self.insert_element(&name_lower, attributes);
280
281 if is_void {
282 self.pop_element();
283 }
284 }
285
286 InsertionMode::AfterBody => {
287 if name_lower == "html" {
288 for (key, value) in attributes {
290 self.document.root.set_attribute(key, value);
291 }
292 } else {
293 self.insertion_mode = InsertionMode::InBody;
294 self.process_start_tag(name, attributes, self_closing);
295 }
296 }
297
298 InsertionMode::AfterAfterBody => {
299 self.insertion_mode = InsertionMode::InBody;
300 self.process_start_tag(name, attributes, self_closing);
301 }
302 }
303 }
304
305 fn process_end_tag(&mut self, name: &str) {
306 let name_lower = name.to_ascii_lowercase();
307
308 match self.insertion_mode {
309 InsertionMode::InHead => {
310 if name_lower == "head" {
311 self.pop_element();
312 self.insertion_mode = InsertionMode::AfterHead;
313 }
314 }
315
316 InsertionMode::InBody => {
317 if name_lower == "body" || name_lower == "html" {
318 self.insertion_mode = InsertionMode::AfterBody;
319 } else {
320 self.pop_until(&name_lower);
322 }
323 }
324
325 InsertionMode::AfterBody => {
326 if name_lower == "html" {
327 self.insertion_mode = InsertionMode::AfterAfterBody;
328 }
329 }
330
331 _ => {}
332 }
333 }
334
335 fn process_comment(&mut self, data: String) {
336 let comment = Node::Comment(Comment::new(data));
337 self.insert_into_current(comment);
338 }
339
340 fn create_html_element(&mut self, attributes: Vec<(String, String)>) {
341 self.document.root = Element::new("html");
342 for (key, value) in attributes {
343 self.document
344 .root
345 .attributes
346 .push(Attribute::new(key, value));
347 }
348 self.open_elements.push(0); self.open_element_names.push(String::from("html"));
350 }
351
352 fn insert_element(&mut self, tag_name: &str, attributes: Vec<(String, String)>) {
353 let mut element = Element::new(tag_name);
354 for (key, value) in attributes {
355 element.attributes.push(Attribute::new(key, value));
356 }
357
358 let node = Node::Element(element);
359 let idx = self.insert_into_current(node);
360 self.open_elements.push(idx);
361 self.open_element_names.push(String::from(tag_name));
362 }
363
364 fn insert_into_current(&mut self, node: Node) -> usize {
365 let parent = navigate_to_element(&mut self.document.root, &self.open_elements);
366 let idx = parent.children.len();
367 parent.children.push(node);
368 idx
369 }
370
371 fn insert_text(&mut self, text: String) {
372 let text_node = Node::Text(Text::new(text));
373 self.insert_into_current(text_node);
374 }
375
376 fn pop_element(&mut self) {
377 self.open_elements.pop();
378 self.open_element_names.pop();
379 }
380
381 fn pop_until(&mut self, tag_name: &str) {
384 while self.open_element_names.len() > 1 {
385 if self.open_element_names.last().map(String::as_str) == Some(tag_name) {
386 self.open_elements.pop();
387 self.open_element_names.pop();
388 return;
389 }
390 self.open_elements.pop();
391 self.open_element_names.pop();
392 }
393 }
394
395 #[must_use]
397 pub fn finish(mut self) -> Document {
398 self.flush_pending_text();
399 self.document
400 }
401
402 #[must_use]
404 pub fn finish_fragment(mut self) -> Vec<Node> {
405 self.flush_pending_text();
406 self.document.root.children
407 }
408}
409
410impl Default for TreeBuilder {
411 fn default() -> Self {
412 Self::new()
413 }
414}
415
416#[cfg(test)]
417mod tests {
418 use super::*;
419 use crate::tokenizer::Tokenizer;
420
421 fn parse(html: &str) -> Document {
422 let tokenizer = Tokenizer::new(html);
423 let mut builder = TreeBuilder::new();
424 for token in tokenizer {
425 builder.process_token(token);
426 }
427 builder.finish()
428 }
429
430 fn parse_fragment(html: &str) -> Vec<Node> {
431 let tokenizer = Tokenizer::new(html);
432 let mut builder = TreeBuilder::new();
433 builder.set_fragment_mode(true);
434 for token in tokenizer {
435 builder.process_token(token);
436 }
437 builder.finish_fragment()
438 }
439
440 #[test]
441 fn test_simple_document() {
442 let doc = parse(
443 "<!DOCTYPE html><html><head><title>Test</title>\
444 </head><body><p>Hello</p></body></html>",
445 );
446 assert!(doc.doctype.is_some());
447 assert_eq!(doc.doctype.as_ref().unwrap().name, "html");
448 assert_eq!(doc.root.tag_name, "html");
449 }
450
451 #[test]
452 fn test_implicit_html() {
453 let doc = parse("<p>Hello</p>");
454 assert_eq!(doc.root.tag_name, "html");
455 assert!(doc.body().is_some());
456 }
457
458 #[test]
459 fn test_fragment() {
460 let nodes = parse_fragment("<div><span>Hello</span></div>");
461 assert_eq!(nodes.len(), 1);
462 if let Some(Node::Element(div)) = nodes.first() {
463 assert_eq!(div.tag_name, "div");
464 }
465 }
466
467 #[test]
468 fn test_text_content() {
469 let doc = parse("<p>Hello World</p>");
470 if let Some(body) = doc.body() {
471 if let Some(p) = body.find_element("p") {
472 assert_eq!(p.text_content(), Some("Hello World".into()));
473 }
474 }
475 }
476
477 #[test]
478 fn test_attributes() {
479 let nodes = parse_fragment(r#"<div class="container" id="main"></div>"#);
480 if let Some(Node::Element(div)) = nodes.first() {
481 assert_eq!(div.get_attribute("class"), Some("container"));
482 assert_eq!(div.get_attribute("id"), Some("main"));
483 }
484 }
485
486 #[test]
487 fn test_pop_until_nested() {
488 let nodes = parse_fragment("<div><span>Hello</span> World</div>");
489 assert_eq!(nodes.len(), 1);
490 if let Some(Node::Element(div)) = nodes.first() {
491 assert_eq!(div.tag_name, "div");
492 assert_eq!(div.children.len(), 2);
493 if let Some(Node::Element(span)) = div.children.first() {
494 assert_eq!(span.tag_name, "span");
495 assert_eq!(span.text_content(), Some("Hello".into()));
496 }
497 }
498 }
499
500 #[test]
501 fn test_deeply_nested_fragment() {
502 let nodes = parse_fragment("<div><ul><li><span>Deep</span></li></ul></div>");
503 assert_eq!(nodes.len(), 1);
504 if let Some(Node::Element(div)) = nodes.first() {
505 let ul = div.find_element("ul").unwrap();
506 let li = ul.find_element("li").unwrap();
507 let span = li.find_element("span").unwrap();
508 assert_eq!(span.text_content(), Some("Deep".into()));
509 }
510 }
511
512 #[test]
513 fn test_fragment_void_elements() {
514 let nodes = parse_fragment("<div><br><span>After</span></div>");
515 assert_eq!(nodes.len(), 1);
516 if let Some(Node::Element(div)) = nodes.first() {
517 assert_eq!(div.children.len(), 2);
519 if let Some(Node::Element(br)) = div.children.first() {
520 assert_eq!(br.tag_name, "br");
521 assert!(br.children.is_empty());
522 }
523 }
524 }
525
526 #[test]
527 fn test_fragment_multiple_top_level() {
528 let nodes = parse_fragment("<p>One</p><p>Two</p><p>Three</p>");
529 assert_eq!(nodes.len(), 3);
530 }
531
532 #[test]
533 fn test_many_children_fragment() {
534 use core::fmt::Write;
535 let mut html = String::from("<div>");
536 for i in 0..1100 {
537 let _ = write!(html, "<span>{i}</span>");
538 }
539 html.push_str("</div>");
540 let nodes = parse_fragment(&html);
541 assert_eq!(nodes.len(), 1);
542 if let Some(Node::Element(div)) = nodes.first() {
543 assert_eq!(div.children.len(), 1100);
544 }
545 }
546
547 #[test]
548 fn test_unmatched_end_tag() {
549 let nodes = parse_fragment("<div>Hello</span></div>");
551 assert_eq!(nodes.len(), 1);
552 if let Some(Node::Element(div)) = nodes.first() {
553 assert_eq!(div.tag_name, "div");
554 }
555 }
556
557 #[test]
560 fn test_pop_until_skips_intermediate() {
561 let nodes = parse_fragment("<div><span><em>Text</div>");
563 assert_eq!(nodes.len(), 1);
564 let div = nodes[0].as_element().unwrap();
565 assert_eq!(div.tag_name, "div");
566 let span = div.find_element("span").unwrap();
569 let em = span.find_element("em").unwrap();
570 assert_eq!(em.text_content(), Some("Text".into()));
571 }
572
573 #[test]
574 fn test_pop_until_no_match_preserves_root() {
575 let nodes = parse_fragment("<div><p>Hello</p></nonexistent></div>");
577 assert_eq!(nodes.len(), 1);
578 let div = nodes[0].as_element().unwrap();
579 assert_eq!(div.tag_name, "div");
580 }
581
582 #[test]
583 fn test_pop_until_closes_correct_level() {
584 let nodes = parse_fragment("<div><div><span>Inner</span></div><span>Outer</span></div>");
586 assert_eq!(nodes.len(), 1);
587 let outer = nodes[0].as_element().unwrap();
588 assert_eq!(outer.tag_name, "div");
589 assert_eq!(outer.children.len(), 2);
590 let inner = outer.children[0].as_element().unwrap();
592 assert_eq!(inner.tag_name, "div");
593 assert_eq!(
594 inner.find_element("span").unwrap().text_content(),
595 Some("Inner".into())
596 );
597 let outer_span = outer.children[1].as_element().unwrap();
599 assert_eq!(outer_span.tag_name, "span");
600 assert_eq!(outer_span.text_content(), Some("Outer".into()));
601 }
602
603 #[test]
604 fn test_pop_until_multiple_same_tag() {
605 let nodes = parse_fragment("<div><span><span><span>Deep</span>Mid</span>Top</span></div>");
607 assert_eq!(nodes.len(), 1);
608 let div = nodes[0].as_element().unwrap();
609 let s1 = div.find_element("span").unwrap();
610 let s2 = s1.find_element("span").unwrap();
611 let s3 = s2.find_element("span").unwrap();
612 assert_eq!(s3.text_content(), Some("Deep".into()));
613 assert!(s2.children.len() >= 2);
615 assert!(s1.children.len() >= 2);
617 }
618
619 #[test]
622 fn test_fragment_five_levels_deep() {
623 let nodes = parse_fragment(
624 "<div><section><article><header><h1>Title</h1>\
625 </header></article></section></div>",
626 );
627 assert_eq!(nodes.len(), 1);
628 let div = nodes[0].as_element().unwrap();
629 let section = div.find_element("section").unwrap();
630 let article = section.find_element("article").unwrap();
631 let header = article.find_element("header").unwrap();
632 let h1 = header.find_element("h1").unwrap();
633 assert_eq!(h1.text_content(), Some("Title".into()));
634 }
635
636 #[test]
637 fn test_fragment_text_at_every_level() {
638 let nodes = parse_fragment("<div>A<span>B<em>C</em>D</span>E</div>");
639 assert_eq!(nodes.len(), 1);
640 let div = nodes[0].as_element().unwrap();
641 assert_eq!(div.children.len(), 3);
643 assert_eq!(div.children[0].as_text().unwrap().data, "A");
644 let span = div.children[1].as_element().unwrap();
645 assert_eq!(span.children.len(), 3);
647 assert_eq!(span.children[0].as_text().unwrap().data, "B");
648 let em = span.children[1].as_element().unwrap();
649 assert_eq!(em.text_content(), Some("C".into()));
650 assert_eq!(span.children[2].as_text().unwrap().data, "D");
651 assert_eq!(div.children[2].as_text().unwrap().data, "E");
652 }
653
654 #[test]
655 fn test_fragment_siblings_with_children() {
656 let nodes = parse_fragment("<ul><li>One<em>!</em></li><li>Two</li><li>Three</li></ul>");
657 assert_eq!(nodes.len(), 1);
658 let ul = nodes[0].as_element().unwrap();
659 assert_eq!(ul.children.len(), 3);
660 let li1 = ul.children[0].as_element().unwrap();
662 assert_eq!(li1.children.len(), 2);
663 assert_eq!(li1.children[0].as_text().unwrap().data, "One");
664 assert_eq!(
665 li1.children[1].as_element().unwrap().text_content(),
666 Some("!".into())
667 );
668 let li2 = ul.children[1].as_element().unwrap();
670 assert_eq!(li2.text_content(), Some("Two".into()));
671 let li3 = ul.children[2].as_element().unwrap();
672 assert_eq!(li3.text_content(), Some("Three".into()));
673 }
674
675 #[test]
678 fn test_fragment_multiple_void_elements() {
679 let nodes = parse_fragment("<div><br><hr><img><input></div>");
680 assert_eq!(nodes.len(), 1);
681 let div = nodes[0].as_element().unwrap();
682 assert_eq!(div.children.len(), 4);
683 assert_eq!(div.children[0].as_element().unwrap().tag_name, "br");
684 assert_eq!(div.children[1].as_element().unwrap().tag_name, "hr");
685 assert_eq!(div.children[2].as_element().unwrap().tag_name, "img");
686 assert_eq!(div.children[3].as_element().unwrap().tag_name, "input");
687 for child in &div.children {
689 assert!(child.as_element().unwrap().children.is_empty());
690 }
691 }
692
693 #[test]
694 fn test_fragment_void_between_text() {
695 let nodes = parse_fragment("<p>Before<br>After</p>");
696 assert_eq!(nodes.len(), 1);
697 let p = nodes[0].as_element().unwrap();
698 assert_eq!(p.children.len(), 3);
699 assert_eq!(p.children[0].as_text().unwrap().data, "Before");
700 assert_eq!(p.children[1].as_element().unwrap().tag_name, "br");
701 assert_eq!(p.children[2].as_text().unwrap().data, "After");
702 }
703
704 #[test]
705 fn test_fragment_void_with_attributes() {
706 let nodes = parse_fragment(r#"<div><img src="a.png" alt="test"><br></div>"#);
707 assert_eq!(nodes.len(), 1);
708 let div = nodes[0].as_element().unwrap();
709 let img = div.children[0].as_element().unwrap();
710 assert_eq!(img.get_attribute("src"), Some("a.png"));
711 assert_eq!(img.get_attribute("alt"), Some("test"));
712 assert!(img.children.is_empty());
713 }
714
715 #[test]
716 fn test_fragment_void_nested_inside() {
717 let nodes = parse_fragment("<table><tr><td><input></td></tr></table>");
719 assert_eq!(nodes.len(), 1);
720 let table = nodes[0].as_element().unwrap();
721 let tr = table.find_element("tr").unwrap();
722 let td = tr.find_element("td").unwrap();
723 let input = td.find_element("input").unwrap();
724 assert!(input.children.is_empty());
725 }
726
727 #[test]
730 fn test_fragment_comment_top_level() {
731 let nodes = parse_fragment("<!-- top --><div>Hi</div>");
732 assert_eq!(nodes.len(), 2);
733 assert!(matches!(nodes[0], Node::Comment(_)));
734 if let Node::Comment(c) = &nodes[0] {
735 assert_eq!(c.data, " top ");
736 }
737 assert_eq!(nodes[1].as_element().unwrap().tag_name, "div");
738 }
739
740 #[test]
741 fn test_fragment_comment_inside_element() {
742 let nodes = parse_fragment("<div><!-- inside --></div>");
743 assert_eq!(nodes.len(), 1);
744 let div = nodes[0].as_element().unwrap();
745 assert_eq!(div.children.len(), 1);
746 assert!(matches!(div.children[0], Node::Comment(_)));
747 }
748
749 #[test]
750 fn test_fragment_comment_between_elements() {
751 let nodes = parse_fragment("<ul><li>A</li><!-- sep --><li>B</li></ul>");
752 assert_eq!(nodes.len(), 1);
753 let ul = nodes[0].as_element().unwrap();
754 assert_eq!(ul.children.len(), 3);
755 assert_eq!(ul.children[0].as_element().unwrap().tag_name, "li");
756 assert!(matches!(ul.children[1], Node::Comment(_)));
757 assert_eq!(ul.children[2].as_element().unwrap().tag_name, "li");
758 }
759
760 #[test]
763 fn test_fragment_text_only() {
764 let nodes = parse_fragment("Just text");
765 assert_eq!(nodes.len(), 1);
766 assert_eq!(nodes[0].as_text().unwrap().data, "Just text");
767 }
768
769 #[test]
770 fn test_fragment_mixed_top_level() {
771 let nodes = parse_fragment("Hello <em>world</em> and <strong>more</strong>!");
772 assert_eq!(nodes.len(), 5);
774 assert_eq!(nodes[0].as_text().unwrap().data, "Hello ");
775 assert_eq!(nodes[1].as_element().unwrap().tag_name, "em");
776 assert_eq!(nodes[2].as_text().unwrap().data, " and ");
777 assert_eq!(nodes[3].as_element().unwrap().tag_name, "strong");
778 assert_eq!(nodes[4].as_text().unwrap().data, "!");
779 }
780
781 #[test]
782 fn test_fragment_empty() {
783 let nodes = parse_fragment("");
784 assert!(nodes.is_empty());
785 }
786
787 #[test]
788 fn test_fragment_whitespace_only() {
789 let nodes = parse_fragment(" ");
791 assert_eq!(nodes.len(), 1);
792 assert_eq!(nodes[0].as_text().unwrap().data, " ");
793 }
794
795 #[test]
798 fn test_malformed_only_end_tags() {
799 let nodes = parse_fragment("</div></span></p>");
800 assert!(nodes.is_empty());
802 }
803
804 #[test]
805 fn test_malformed_extra_end_tags() {
806 let nodes = parse_fragment("<div>Hello</div></div></div></div>");
807 assert_eq!(nodes.len(), 1);
808 let div = nodes[0].as_element().unwrap();
809 assert_eq!(div.text_content(), Some("Hello".into()));
810 }
811
812 #[test]
813 fn test_malformed_unclosed_tags() {
814 let nodes = parse_fragment("<div><span><em>Text");
816 assert_eq!(nodes.len(), 1);
817 let div = nodes[0].as_element().unwrap();
818 let span = div.find_element("span").unwrap();
819 let em = span.find_element("em").unwrap();
820 assert_eq!(em.text_content(), Some("Text".into()));
821 }
822
823 #[test]
824 fn test_malformed_interleaved_tags() {
825 let nodes = parse_fragment("<b><i>Text</b>After</i>");
827 assert!(!nodes.is_empty());
831 let b = nodes[0].as_element().unwrap();
832 assert_eq!(b.tag_name, "b");
833 }
834
835 #[test]
836 fn test_malformed_deeply_mismatched() {
837 let nodes = parse_fragment("<a><b><c><d><e>Text</a>");
838 assert_eq!(nodes.len(), 1);
840 let a = nodes[0].as_element().unwrap();
841 assert_eq!(a.tag_name, "a");
842 assert!(a.find_element("e").is_some());
843 }
844
845 #[test]
848 fn test_document_head_elements() {
849 let doc = parse(
850 r#"<!DOCTYPE html><html><head>
851 <title>Test</title>
852 <meta charset="utf-8">
853 <link rel="stylesheet" href="style.css">
854 </head><body></body></html>"#,
855 );
856 let head = doc.head().unwrap();
857 assert!(head.find_element("title").is_some());
858 assert!(head.find_element("meta").is_some());
859 assert!(head.find_element("link").is_some());
860 }
861
862 #[test]
863 fn test_document_implicit_body() {
864 let doc = parse("<html><head></head><div>Content</div></html>");
866 let body = doc.body().unwrap();
867 let div = body.find_element("div").unwrap();
868 assert_eq!(div.text_content(), Some("Content".into()));
869 }
870
871 #[test]
872 fn test_document_implicit_head_and_body() {
873 let doc = parse("<div>Content</div>");
875 assert_eq!(doc.root.tag_name, "html");
876 assert!(doc.head().is_some());
877 assert!(doc.body().is_some());
878 let body = doc.body().unwrap();
879 let div = body.find_element("div").unwrap();
880 assert_eq!(div.text_content(), Some("Content".into()));
881 }
882
883 #[test]
884 fn test_document_title() {
885 let doc = parse(
886 "<!DOCTYPE html><html><head><title>Hello World</title></head>\
887 <body></body></html>",
888 );
889 assert_eq!(doc.title(), Some(String::from("Hello World")));
890 }
891
892 #[test]
893 fn test_document_round_trip() {
894 let html = "<!DOCTYPE html><html><head><title>Test</title></head>\
895 <body><p>Hello</p></body></html>";
896 let doc = parse(html);
897 let output = doc.to_html();
898 let doc2 = parse(&output);
900 assert_eq!(doc2.title(), Some(String::from("Test")));
901 let body = doc2.body().unwrap();
902 let p = body.find_element("p").unwrap();
903 assert_eq!(p.text_content(), Some("Hello".into()));
904 }
905}