123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274 |
- """Integration code for CSS selectors using Soup Sieve (pypi: soupsieve)."""
- # We don't use soupsieve
- soupsieve = None
- class CSS(object):
- """A proxy object against the soupsieve library, to simplify its
- CSS selector API.
- Acquire this object through the .css attribute on the
- BeautifulSoup object, or on the Tag you want to use as the
- starting point for a CSS selector.
- The main advantage of doing this is that the tag to be selected
- against doesn't need to be explicitly specified in the function
- calls, since it's already scoped to a tag.
- """
- def __init__(self, tag, api=soupsieve):
- """Constructor.
- You don't need to instantiate this class yourself; instead,
- access the .css attribute on the BeautifulSoup object, or on
- the Tag you want to use as the starting point for your CSS
- selector.
- :param tag: All CSS selectors will use this as their starting
- point.
- :param api: A plug-in replacement for the soupsieve module,
- designed mainly for use in tests.
- """
- if api is None:
- raise NotImplementedError(
- "Cannot execute CSS selectors because the soupsieve package is not installed."
- )
- self.api = api
- self.tag = tag
- def escape(self, ident):
- """Escape a CSS identifier.
- This is a simple wrapper around soupselect.escape(). See the
- documentation for that function for more information.
- """
- if soupsieve is None:
- raise NotImplementedError(
- "Cannot escape CSS identifiers because the soupsieve package is not installed."
- )
- return self.api.escape(ident)
- def _ns(self, ns, select):
- """Normalize a dictionary of namespaces."""
- if not isinstance(select, self.api.SoupSieve) and ns is None:
- # If the selector is a precompiled pattern, it already has
- # a namespace context compiled in, which cannot be
- # replaced.
- ns = self.tag._namespaces
- return ns
- def _rs(self, results):
- """Normalize a list of results to a Resultset.
- A ResultSet is more consistent with the rest of Beautiful
- Soup's API, and ResultSet.__getattr__ has a helpful error
- message if you try to treat a list of results as a single
- result (a common mistake).
- """
- # Import here to avoid circular import
- from bs4.element import ResultSet
- return ResultSet(None, results)
- def compile(self, select, namespaces=None, flags=0, **kwargs):
- """Pre-compile a selector and return the compiled object.
- :param selector: A CSS selector.
- :param namespaces: A dictionary mapping namespace prefixes
- used in the CSS selector to namespace URIs. By default,
- Beautiful Soup will use the prefixes it encountered while
- parsing the document.
- :param flags: Flags to be passed into Soup Sieve's
- soupsieve.compile() method.
- :param kwargs: Keyword arguments to be passed into SoupSieve's
- soupsieve.compile() method.
- :return: A precompiled selector object.
- :rtype: soupsieve.SoupSieve
- """
- return self.api.compile(
- select, self._ns(namespaces, select), flags, **kwargs
- )
- def select_one(self, select, namespaces=None, flags=0, **kwargs):
- """Perform a CSS selection operation on the current Tag and return the
- first result.
- This uses the Soup Sieve library. For more information, see
- that library's documentation for the soupsieve.select_one()
- method.
- :param selector: A CSS selector.
- :param namespaces: A dictionary mapping namespace prefixes
- used in the CSS selector to namespace URIs. By default,
- Beautiful Soup will use the prefixes it encountered while
- parsing the document.
- :param flags: Flags to be passed into Soup Sieve's
- soupsieve.select_one() method.
- :param kwargs: Keyword arguments to be passed into SoupSieve's
- soupsieve.select_one() method.
- :return: A Tag, or None if the selector has no match.
- :rtype: bs4.element.Tag
- """
- return self.api.select_one(
- select, self.tag, self._ns(namespaces, select), flags, **kwargs
- )
- def select(self, select, namespaces=None, limit=0, flags=0, **kwargs):
- """Perform a CSS selection operation on the current Tag.
- This uses the Soup Sieve library. For more information, see
- that library's documentation for the soupsieve.select()
- method.
- :param selector: A string containing a CSS selector.
- :param namespaces: A dictionary mapping namespace prefixes
- used in the CSS selector to namespace URIs. By default,
- Beautiful Soup will pass in the prefixes it encountered while
- parsing the document.
- :param limit: After finding this number of results, stop looking.
- :param flags: Flags to be passed into Soup Sieve's
- soupsieve.select() method.
- :param kwargs: Keyword arguments to be passed into SoupSieve's
- soupsieve.select() method.
- :return: A ResultSet of Tag objects.
- :rtype: bs4.element.ResultSet
- """
- if limit is None:
- limit = 0
- return self._rs(
- self.api.select(
- select, self.tag, self._ns(namespaces, select), limit, flags,
- **kwargs
- )
- )
- def iselect(self, select, namespaces=None, limit=0, flags=0, **kwargs):
- """Perform a CSS selection operation on the current Tag.
- This uses the Soup Sieve library. For more information, see
- that library's documentation for the soupsieve.iselect()
- method. It is the same as select(), but it returns a generator
- instead of a list.
- :param selector: A string containing a CSS selector.
- :param namespaces: A dictionary mapping namespace prefixes
- used in the CSS selector to namespace URIs. By default,
- Beautiful Soup will pass in the prefixes it encountered while
- parsing the document.
- :param limit: After finding this number of results, stop looking.
- :param flags: Flags to be passed into Soup Sieve's
- soupsieve.iselect() method.
- :param kwargs: Keyword arguments to be passed into SoupSieve's
- soupsieve.iselect() method.
- :return: A generator
- :rtype: types.GeneratorType
- """
- return self.api.iselect(
- select, self.tag, self._ns(namespaces, select), limit, flags, **kwargs
- )
- def closest(self, select, namespaces=None, flags=0, **kwargs):
- """Find the Tag closest to this one that matches the given selector.
- This uses the Soup Sieve library. For more information, see
- that library's documentation for the soupsieve.closest()
- method.
- :param selector: A string containing a CSS selector.
- :param namespaces: A dictionary mapping namespace prefixes
- used in the CSS selector to namespace URIs. By default,
- Beautiful Soup will pass in the prefixes it encountered while
- parsing the document.
- :param flags: Flags to be passed into Soup Sieve's
- soupsieve.closest() method.
- :param kwargs: Keyword arguments to be passed into SoupSieve's
- soupsieve.closest() method.
- :return: A Tag, or None if there is no match.
- :rtype: bs4.Tag
- """
- return self.api.closest(
- select, self.tag, self._ns(namespaces, select), flags, **kwargs
- )
- def match(self, select, namespaces=None, flags=0, **kwargs):
- """Check whether this Tag matches the given CSS selector.
- This uses the Soup Sieve library. For more information, see
- that library's documentation for the soupsieve.match()
- method.
- :param: a CSS selector.
- :param namespaces: A dictionary mapping namespace prefixes
- used in the CSS selector to namespace URIs. By default,
- Beautiful Soup will pass in the prefixes it encountered while
- parsing the document.
- :param flags: Flags to be passed into Soup Sieve's
- soupsieve.match() method.
- :param kwargs: Keyword arguments to be passed into SoupSieve's
- soupsieve.match() method.
- :return: True if this Tag matches the selector; False otherwise.
- :rtype: bool
- """
- return self.api.match(
- select, self.tag, self._ns(namespaces, select), flags, **kwargs
- )
- def filter(self, select, namespaces=None, flags=0, **kwargs):
- """Filter this Tag's direct children based on the given CSS selector.
- This uses the Soup Sieve library. It works the same way as
- passing this Tag into that library's soupsieve.filter()
- method. More information, for more information see the
- documentation for soupsieve.filter().
- :param namespaces: A dictionary mapping namespace prefixes
- used in the CSS selector to namespace URIs. By default,
- Beautiful Soup will pass in the prefixes it encountered while
- parsing the document.
- :param flags: Flags to be passed into Soup Sieve's
- soupsieve.filter() method.
- :param kwargs: Keyword arguments to be passed into SoupSieve's
- soupsieve.filter() method.
- :return: A ResultSet of Tag objects.
- :rtype: bs4.element.ResultSet
- """
- return self._rs(
- self.api.filter(
- select, self.tag, self._ns(namespaces, select), flags, **kwargs
- )
- )
|