test_lxml.py 2.2 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970
  1. """Tests to ensure that the lxml tree builder generates good trees."""
  2. import warnings
  3. try:
  4. import lxml.etree
  5. LXML_PRESENT = True
  6. LXML_VERSION = lxml.etree.LXML_VERSION
  7. except ImportError as e:
  8. LXML_PRESENT = False
  9. LXML_VERSION = (0,)
  10. if LXML_PRESENT:
  11. from bs4.builder import LXMLTreeBuilder, LXMLTreeBuilderForXML
  12. from bs4 import BeautifulStoneSoup
  13. from bs4.testing import skipIf
  14. from bs4.testing import (
  15. HTMLTreeBuilderSmokeTest,
  16. XMLTreeBuilderSmokeTest,
  17. SoupTest,
  18. skipIf,
  19. )
  20. @skipIf(
  21. not LXML_PRESENT,
  22. "lxml seems not to be present, not testing its tree builder.")
  23. class LXMLTreeBuilderSmokeTest(SoupTest, HTMLTreeBuilderSmokeTest):
  24. """See ``HTMLTreeBuilderSmokeTest``."""
  25. @property
  26. def default_builder(self):
  27. return LXMLTreeBuilder()
  28. def test_out_of_range_entity(self):
  29. self.assertSoupEquals(
  30. "<p>foo&#10000000000000;bar</p>", "<p>foobar</p>")
  31. self.assertSoupEquals(
  32. "<p>foo&#x10000000000000;bar</p>", "<p>foobar</p>")
  33. self.assertSoupEquals(
  34. "<p>foo&#1000000000;bar</p>", "<p>foobar</p>")
  35. # In lxml < 2.3.5, an empty doctype causes a segfault. Skip this
  36. # test if an old version of lxml is installed.
  37. @skipIf(
  38. not LXML_PRESENT or LXML_VERSION < (2,3,5,0),
  39. "Skipping doctype test for old version of lxml to avoid segfault.")
  40. def test_empty_doctype(self):
  41. soup = self.soup("<!DOCTYPE>")
  42. doctype = soup.contents[0]
  43. self.assertEqual("", doctype.strip())
  44. def test_beautifulstonesoup_is_xml_parser(self):
  45. # Make sure that the deprecated BSS class uses an xml builder
  46. # if one is installed.
  47. with warnings.catch_warnings(record=True) as w:
  48. soup = BeautifulStoneSoup("<b />")
  49. self.assertEqual("<b/>", str(soup.b))
  50. self.assertTrue("BeautifulStoneSoup class is deprecated" in str(w[0].message))
  51. @skipIf(
  52. not LXML_PRESENT,
  53. "lxml seems not to be present, not testing its XML tree builder.")
  54. class LXMLXMLTreeBuilderSmokeTest(SoupTest, XMLTreeBuilderSmokeTest):
  55. """See ``HTMLTreeBuilderSmokeTest``."""
  56. @property
  57. def default_builder(self):
  58. return LXMLTreeBuilderForXML()