Coverage for cookbook/helper/scrapers/scrapers.py: 93%
27 statements
« prev ^ index » next coverage.py v7.4.0, created at 2023-12-29 01:02 +0100
« prev ^ index » next coverage.py v7.4.0, created at 2023-12-29 01:02 +0100
1from json import JSONDecodeError
3from bs4 import BeautifulSoup
4from recipe_scrapers import SCRAPERS, get_host_name
5from recipe_scrapers._factory import SchemaScraperFactory
6from recipe_scrapers._schemaorg import SchemaOrg
8from .cooksillustrated import CooksIllustrated
10CUSTOM_SCRAPERS = {
11 CooksIllustrated.host(site="cooksillustrated"): CooksIllustrated,
12 CooksIllustrated.host(site="americastestkitchen"): CooksIllustrated,
13 CooksIllustrated.host(site="cookscountry"): CooksIllustrated,
14}
15SCRAPERS.update(CUSTOM_SCRAPERS)
18def text_scraper(text, url=None):
19 domain = None
20 if url:
21 domain = get_host_name(url)
22 if domain in SCRAPERS:
23 scraper_class = SCRAPERS[domain]
24 else:
25 scraper_class = SchemaScraperFactory.SchemaScraper
27 class TextScraper(scraper_class):
28 def __init__(
29 self,
30 html=None,
31 url=None,
32 ):
33 self.wild_mode = False
34 self.meta_http_equiv = False
35 self.soup = BeautifulSoup(html, "html.parser")
36 self.url = url
37 self.recipe = None
38 try:
39 self.schema = SchemaOrg(html)
40 except (JSONDecodeError, AttributeError):
41 pass
43 return TextScraper(url=url, html=text)