Coverage for cookbook/helper/scrapers/scrapers.py: 93%

27 statements  

« prev     ^ index     » next       coverage.py v7.4.0, created at 2023-12-29 01:02 +0100

1from json import JSONDecodeError 

2 

3from bs4 import BeautifulSoup 

4from recipe_scrapers import SCRAPERS, get_host_name 

5from recipe_scrapers._factory import SchemaScraperFactory 

6from recipe_scrapers._schemaorg import SchemaOrg 

7 

8from .cooksillustrated import CooksIllustrated 

9 

10CUSTOM_SCRAPERS = { 

11 CooksIllustrated.host(site="cooksillustrated"): CooksIllustrated, 

12 CooksIllustrated.host(site="americastestkitchen"): CooksIllustrated, 

13 CooksIllustrated.host(site="cookscountry"): CooksIllustrated, 

14} 

15SCRAPERS.update(CUSTOM_SCRAPERS) 

16 

17 

18def text_scraper(text, url=None): 

19 domain = None 

20 if url: 

21 domain = get_host_name(url) 

22 if domain in SCRAPERS: 

23 scraper_class = SCRAPERS[domain] 

24 else: 

25 scraper_class = SchemaScraperFactory.SchemaScraper 

26 

27 class TextScraper(scraper_class): 

28 def __init__( 

29 self, 

30 html=None, 

31 url=None, 

32 ): 

33 self.wild_mode = False 

34 self.meta_http_equiv = False 

35 self.soup = BeautifulSoup(html, "html.parser") 

36 self.url = url 

37 self.recipe = None 

38 try: 

39 self.schema = SchemaOrg(html) 

40 except (JSONDecodeError, AttributeError): 

41 pass 

42 

43 return TextScraper(url=url, html=text)