css.py 9.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274
  1. """Integration code for CSS selectors using Soup Sieve (pypi: soupsieve)."""
  2. # We don't use soupsieve
  3. soupsieve = None
  4. class CSS(object):
  5. """A proxy object against the soupsieve library, to simplify its
  6. CSS selector API.
  7. Acquire this object through the .css attribute on the
  8. BeautifulSoup object, or on the Tag you want to use as the
  9. starting point for a CSS selector.
  10. The main advantage of doing this is that the tag to be selected
  11. against doesn't need to be explicitly specified in the function
  12. calls, since it's already scoped to a tag.
  13. """
  14. def __init__(self, tag, api=soupsieve):
  15. """Constructor.
  16. You don't need to instantiate this class yourself; instead,
  17. access the .css attribute on the BeautifulSoup object, or on
  18. the Tag you want to use as the starting point for your CSS
  19. selector.
  20. :param tag: All CSS selectors will use this as their starting
  21. point.
  22. :param api: A plug-in replacement for the soupsieve module,
  23. designed mainly for use in tests.
  24. """
  25. if api is None:
  26. raise NotImplementedError(
  27. "Cannot execute CSS selectors because the soupsieve package is not installed."
  28. )
  29. self.api = api
  30. self.tag = tag
  31. def escape(self, ident):
  32. """Escape a CSS identifier.
  33. This is a simple wrapper around soupselect.escape(). See the
  34. documentation for that function for more information.
  35. """
  36. if soupsieve is None:
  37. raise NotImplementedError(
  38. "Cannot escape CSS identifiers because the soupsieve package is not installed."
  39. )
  40. return self.api.escape(ident)
  41. def _ns(self, ns, select):
  42. """Normalize a dictionary of namespaces."""
  43. if not isinstance(select, self.api.SoupSieve) and ns is None:
  44. # If the selector is a precompiled pattern, it already has
  45. # a namespace context compiled in, which cannot be
  46. # replaced.
  47. ns = self.tag._namespaces
  48. return ns
  49. def _rs(self, results):
  50. """Normalize a list of results to a Resultset.
  51. A ResultSet is more consistent with the rest of Beautiful
  52. Soup's API, and ResultSet.__getattr__ has a helpful error
  53. message if you try to treat a list of results as a single
  54. result (a common mistake).
  55. """
  56. # Import here to avoid circular import
  57. from bs4.element import ResultSet
  58. return ResultSet(None, results)
  59. def compile(self, select, namespaces=None, flags=0, **kwargs):
  60. """Pre-compile a selector and return the compiled object.
  61. :param selector: A CSS selector.
  62. :param namespaces: A dictionary mapping namespace prefixes
  63. used in the CSS selector to namespace URIs. By default,
  64. Beautiful Soup will use the prefixes it encountered while
  65. parsing the document.
  66. :param flags: Flags to be passed into Soup Sieve's
  67. soupsieve.compile() method.
  68. :param kwargs: Keyword arguments to be passed into SoupSieve's
  69. soupsieve.compile() method.
  70. :return: A precompiled selector object.
  71. :rtype: soupsieve.SoupSieve
  72. """
  73. return self.api.compile(
  74. select, self._ns(namespaces, select), flags, **kwargs
  75. )
  76. def select_one(self, select, namespaces=None, flags=0, **kwargs):
  77. """Perform a CSS selection operation on the current Tag and return the
  78. first result.
  79. This uses the Soup Sieve library. For more information, see
  80. that library's documentation for the soupsieve.select_one()
  81. method.
  82. :param selector: A CSS selector.
  83. :param namespaces: A dictionary mapping namespace prefixes
  84. used in the CSS selector to namespace URIs. By default,
  85. Beautiful Soup will use the prefixes it encountered while
  86. parsing the document.
  87. :param flags: Flags to be passed into Soup Sieve's
  88. soupsieve.select_one() method.
  89. :param kwargs: Keyword arguments to be passed into SoupSieve's
  90. soupsieve.select_one() method.
  91. :return: A Tag, or None if the selector has no match.
  92. :rtype: bs4.element.Tag
  93. """
  94. return self.api.select_one(
  95. select, self.tag, self._ns(namespaces, select), flags, **kwargs
  96. )
  97. def select(self, select, namespaces=None, limit=0, flags=0, **kwargs):
  98. """Perform a CSS selection operation on the current Tag.
  99. This uses the Soup Sieve library. For more information, see
  100. that library's documentation for the soupsieve.select()
  101. method.
  102. :param selector: A string containing a CSS selector.
  103. :param namespaces: A dictionary mapping namespace prefixes
  104. used in the CSS selector to namespace URIs. By default,
  105. Beautiful Soup will pass in the prefixes it encountered while
  106. parsing the document.
  107. :param limit: After finding this number of results, stop looking.
  108. :param flags: Flags to be passed into Soup Sieve's
  109. soupsieve.select() method.
  110. :param kwargs: Keyword arguments to be passed into SoupSieve's
  111. soupsieve.select() method.
  112. :return: A ResultSet of Tag objects.
  113. :rtype: bs4.element.ResultSet
  114. """
  115. if limit is None:
  116. limit = 0
  117. return self._rs(
  118. self.api.select(
  119. select, self.tag, self._ns(namespaces, select), limit, flags,
  120. **kwargs
  121. )
  122. )
  123. def iselect(self, select, namespaces=None, limit=0, flags=0, **kwargs):
  124. """Perform a CSS selection operation on the current Tag.
  125. This uses the Soup Sieve library. For more information, see
  126. that library's documentation for the soupsieve.iselect()
  127. method. It is the same as select(), but it returns a generator
  128. instead of a list.
  129. :param selector: A string containing a CSS selector.
  130. :param namespaces: A dictionary mapping namespace prefixes
  131. used in the CSS selector to namespace URIs. By default,
  132. Beautiful Soup will pass in the prefixes it encountered while
  133. parsing the document.
  134. :param limit: After finding this number of results, stop looking.
  135. :param flags: Flags to be passed into Soup Sieve's
  136. soupsieve.iselect() method.
  137. :param kwargs: Keyword arguments to be passed into SoupSieve's
  138. soupsieve.iselect() method.
  139. :return: A generator
  140. :rtype: types.GeneratorType
  141. """
  142. return self.api.iselect(
  143. select, self.tag, self._ns(namespaces, select), limit, flags, **kwargs
  144. )
  145. def closest(self, select, namespaces=None, flags=0, **kwargs):
  146. """Find the Tag closest to this one that matches the given selector.
  147. This uses the Soup Sieve library. For more information, see
  148. that library's documentation for the soupsieve.closest()
  149. method.
  150. :param selector: A string containing a CSS selector.
  151. :param namespaces: A dictionary mapping namespace prefixes
  152. used in the CSS selector to namespace URIs. By default,
  153. Beautiful Soup will pass in the prefixes it encountered while
  154. parsing the document.
  155. :param flags: Flags to be passed into Soup Sieve's
  156. soupsieve.closest() method.
  157. :param kwargs: Keyword arguments to be passed into SoupSieve's
  158. soupsieve.closest() method.
  159. :return: A Tag, or None if there is no match.
  160. :rtype: bs4.Tag
  161. """
  162. return self.api.closest(
  163. select, self.tag, self._ns(namespaces, select), flags, **kwargs
  164. )
  165. def match(self, select, namespaces=None, flags=0, **kwargs):
  166. """Check whether this Tag matches the given CSS selector.
  167. This uses the Soup Sieve library. For more information, see
  168. that library's documentation for the soupsieve.match()
  169. method.
  170. :param: a CSS selector.
  171. :param namespaces: A dictionary mapping namespace prefixes
  172. used in the CSS selector to namespace URIs. By default,
  173. Beautiful Soup will pass in the prefixes it encountered while
  174. parsing the document.
  175. :param flags: Flags to be passed into Soup Sieve's
  176. soupsieve.match() method.
  177. :param kwargs: Keyword arguments to be passed into SoupSieve's
  178. soupsieve.match() method.
  179. :return: True if this Tag matches the selector; False otherwise.
  180. :rtype: bool
  181. """
  182. return self.api.match(
  183. select, self.tag, self._ns(namespaces, select), flags, **kwargs
  184. )
  185. def filter(self, select, namespaces=None, flags=0, **kwargs):
  186. """Filter this Tag's direct children based on the given CSS selector.
  187. This uses the Soup Sieve library. It works the same way as
  188. passing this Tag into that library's soupsieve.filter()
  189. method. More information, for more information see the
  190. documentation for soupsieve.filter().
  191. :param namespaces: A dictionary mapping namespace prefixes
  192. used in the CSS selector to namespace URIs. By default,
  193. Beautiful Soup will pass in the prefixes it encountered while
  194. parsing the document.
  195. :param flags: Flags to be passed into Soup Sieve's
  196. soupsieve.filter() method.
  197. :param kwargs: Keyword arguments to be passed into SoupSieve's
  198. soupsieve.filter() method.
  199. :return: A ResultSet of Tag objects.
  200. :rtype: bs4.element.ResultSet
  201. """
  202. return self._rs(
  203. self.api.filter(
  204. select, self.tag, self._ns(namespaces, select), flags, **kwargs
  205. )
  206. )