Coverage for cookbook/helper/ingredient_parser.py: 85%

175 statements  

« prev     ^ index     » next       coverage.py v7.4.0, created at 2023-12-29 01:02 +0100

1import re 

2import string 

3import unicodedata 

4 

5from cookbook.helper.automation_helper import AutomationEngine 

6from cookbook.models import Food, Ingredient, Unit 

7 

8 

9class IngredientParser: 

10 request = None 

11 ignore_rules = False 

12 automation = None 

13 

14 def __init__(self, request, cache_mode=True, ignore_automations=False): 

15 """ 

16 Initialize ingredient parser 

17 :param request: request context (to control caching, rule ownership, etc.) 

18 :param cache_mode: defines if all rules should be loaded on initialization (good when parser is used many times) or if they should be retrieved every time (good when parser is not used many times in a row) 

19 :param ignore_automations: ignore automation rules, allows to use ingredient parser without database access/request (request can be None) 

20 """ 

21 self.request = request 

22 self.ignore_rules = ignore_automations 

23 if not self.ignore_rules: 

24 self.automation = AutomationEngine(self.request, use_cache=cache_mode) 

25 

26 def get_unit(self, unit): 

27 """ 

28 Get or create a unit for given space respecting possible automations 

29 :param unit: string unit 

30 :return: None if unit passed is invalid, Unit object otherwise 

31 """ 

32 if not unit: 

33 return None 

34 if len(unit) > 0: 

35 if self.ignore_rules: 

36 u, created = Unit.objects.get_or_create(name=unit.strip(), space=self.request.space) 

37 else: 

38 u, created = Unit.objects.get_or_create(name=self.automation.apply_unit_automation(unit), space=self.request.space) 

39 return u 

40 return None 

41 

42 def get_food(self, food): 

43 """ 

44 Get or create a food for given space respecting possible automations 

45 :param food: string food 

46 :return: None if food passed is invalid, Food object otherwise 

47 """ 

48 if not food: 

49 return None 

50 if len(food) > 0: 

51 if self.ignore_rules: 

52 f, created = Food.objects.get_or_create(name=food.strip(), space=self.request.space) 

53 else: 

54 f, created = Food.objects.get_or_create(name=self.automation.apply_food_automation(food), space=self.request.space) 

55 return f 

56 return None 

57 

58 def parse_fraction(self, x): 

59 if len(x) == 1 and 'fraction' in unicodedata.decomposition(x): 

60 frac_split = unicodedata.decomposition(x[-1:]).split() 

61 return (float((frac_split[1]).replace('003', '')) 

62 / float((frac_split[3]).replace('003', ''))) 

63 else: 

64 frac_split = x.split('/') 

65 if not len(frac_split) == 2: 

66 raise ValueError 

67 try: 

68 return int(frac_split[0]) / int(frac_split[1]) 

69 except ZeroDivisionError: 

70 raise ValueError 

71 

72 def parse_amount(self, x): 

73 amount = 0 

74 unit = None 

75 note = '' 

76 if x.strip() == '': 

77 return amount, unit, note 

78 

79 did_check_frac = False 

80 end = 0 

81 while (end < len(x) and (x[end] in string.digits 

82 or ( 

83 (x[end] == '.' or x[end] == ',' or x[end] == '/') 

84 and end + 1 < len(x) 

85 and x[end + 1] in string.digits 

86 ))): 

87 end += 1 

88 if end > 0: 

89 if "/" in x[:end]: 

90 amount = self.parse_fraction(x[:end]) 

91 else: 

92 amount = float(x[:end].replace(',', '.')) 

93 else: 

94 amount = self.parse_fraction(x[0]) 

95 end += 1 

96 did_check_frac = True 

97 if end < len(x): 

98 if did_check_frac: 

99 unit = x[end:] 

100 else: 

101 try: 

102 amount += self.parse_fraction(x[end]) 

103 unit = x[end + 1:] 

104 except ValueError: 

105 unit = x[end:] 

106 

107 if unit is not None and unit.strip() == '': 

108 unit = None 

109 

110 if unit is not None and (unit.startswith('(') or unit.startswith( 

111 '-')): # i dont know any unit that starts with ( or - so its likely an alternative like 1L (500ml) Water or 2-3 

112 unit = None 

113 note = x 

114 return amount, unit, note 

115 

116 def parse_food_with_comma(self, tokens): 

117 food = '' 

118 note = '' 

119 start = 0 

120 # search for first occurrence of an argument ending in a comma 

121 while start < len(tokens) and not tokens[start].endswith(','): 

122 start += 1 

123 if start == len(tokens): 

124 # no token ending in a comma found -> use everything as food 

125 food = ' '.join(tokens) 

126 else: 

127 food = ' '.join(tokens[:start + 1])[:-1] 

128 note = ' '.join(tokens[start + 1:]) 

129 return food, note 

130 

131 def parse_food(self, tokens): 

132 food = '' 

133 note = '' 

134 if tokens[-1].endswith(')'): 

135 # Check if the matching opening bracket is in the same token 

136 if (not tokens[-1].startswith('(')) and ('(' in tokens[-1]): 

137 return self.parse_food_with_comma(tokens) 

138 # last argument ends with closing bracket -> look for opening bracket 

139 start = len(tokens) - 1 

140 while not tokens[start].startswith('(') and not start == 0: 

141 start -= 1 

142 if start == 0: 

143 # the whole list is wrapped in brackets -> assume it is an error (e.g. assumed first argument was the unit) # noqa: E501 

144 raise ValueError 

145 elif start < 0: 

146 # no opening bracket anywhere -> just ignore the last bracket 

147 food, note = self.parse_food_with_comma(tokens) 

148 else: 

149 # opening bracket found -> split in food and note, remove brackets from note # noqa: E501 

150 note = ' '.join(tokens[start:])[1:-1] 

151 food = ' '.join(tokens[:start]) 

152 else: 

153 food, note = self.parse_food_with_comma(tokens) 

154 return food, note 

155 

156 def parse(self, ingredient): 

157 """ 

158 Main parsing function, takes an ingredient string (e.g. '1 l Water') and extracts amount, unit, food, ... 

159 :param ingredient: string ingredient 

160 :return: amount, unit (can be None), food, note (can be empty) 

161 """ 

162 # initialize default values 

163 amount = 0 

164 unit = None 

165 food = '' 

166 note = '' 

167 unit_note = '' 

168 

169 if len(ingredient) == 0: 

170 raise ValueError('string to parse cannot be empty') 

171 

172 if len(ingredient) > 512: 

173 raise ValueError('cannot parse ingredients with more than 512 characters') 

174 

175 # some people/languages put amount and unit at the end of the ingredient string 

176 # if something like this is detected move it to the beginning so the parser can handle it 

177 if len(ingredient) < 1000 and re.search(r'^([^\W\d_])+(.)*[1-9](\d)*\s*([^\W\d_])+', ingredient): 

178 match = re.search(r'[1-9](\d)*\s*([^\W\d_])+', ingredient) 

179 print(f'reording from {ingredient} to {ingredient[match.start():match.end()] + " " + ingredient.replace(ingredient[match.start():match.end()], "")}') 

180 ingredient = ingredient[match.start():match.end()] + ' ' + ingredient.replace(ingredient[match.start():match.end()], '') 

181 

182 # if the string contains parenthesis early on remove it and place it at the end 

183 # because its likely some kind of note 

184 if re.match('(.){1,6}\\s\\((.[^\\(\\)])+\\)\\s', ingredient): 

185 match = re.search('\\((.[^\\(])+\\)', ingredient) 

186 ingredient = ingredient[:match.start()] + ingredient[match.end():] + ' ' + ingredient[match.start():match.end()] 

187 

188 # leading spaces before commas result in extra tokens, clean them out 

189 ingredient = ingredient.replace(' ,', ',') 

190 

191 # handle "(from) - (to)" amounts by using the minimum amount and adding the range to the description 

192 # "10.5 - 200 g XYZ" => "100 g XYZ (10.5 - 200)" 

193 ingredient = re.sub("^(\\d+|\\d+[\\.,]\\d+) - (\\d+|\\d+[\\.,]\\d+) (.*)", "\\1 \\3 (\\1 - \\2)", ingredient) 

194 

195 # if amount and unit are connected add space in between 

196 if re.match('([0-9])+([A-z])+\\s', ingredient): 

197 ingredient = re.sub(r'(?<=([a-z])|\d)(?=(?(1)\d|[a-z]))', ' ', ingredient) 

198 

199 if not self.ignore_rules: 

200 ingredient = self.automation.apply_transpose_automation(ingredient) 

201 

202 tokens = ingredient.split() # split at each space into tokens 

203 if len(tokens) == 1: 

204 # there only is one argument, that must be the food 

205 food = tokens[0] 

206 else: 

207 try: 

208 # try to parse first argument as amount 

209 amount, unit, unit_note = self.parse_amount(tokens[0]) 

210 # only try to parse second argument as amount if there are at least 

211 # three arguments if it already has a unit there can't be 

212 # a fraction for the amount 

213 if len(tokens) > 2: 

214 if not self.ignore_rules: 

215 tokens = self.automation.apply_never_unit_automation(tokens) 

216 try: 

217 if unit is not None: 

218 # a unit is already found, no need to try the second argument for a fraction 

219 # probably not the best method to do it, but I didn't want to make an if check and paste the exact same thing in the else as already is in the except 

220 raise ValueError 

221 # try to parse second argument as amount and add that, in case of '2 1/2' or '2 ½' 

222 amount += self.parse_fraction(tokens[1]) 

223 # assume that units can't end with a comma 

224 if len(tokens) > 3 and not tokens[2].endswith(','): 

225 # try to use third argument as unit and everything else as food, use everything as food if it fails 

226 try: 

227 food, note = self.parse_food(tokens[3:]) 

228 unit = tokens[2] 

229 except ValueError: 

230 food, note = self.parse_food(tokens[2:]) 

231 else: 

232 food, note = self.parse_food(tokens[2:]) 

233 except ValueError: 

234 # assume that units can't end with a comma 

235 if not tokens[1].endswith(','): 

236 # try to use second argument as unit and everything else as food, use everything as food if it fails 

237 try: 

238 food, note = self.parse_food(tokens[2:]) 

239 if unit is None: 

240 unit = tokens[1] 

241 else: 

242 note = tokens[1] 

243 except ValueError: 

244 food, note = self.parse_food(tokens[1:]) 

245 else: 

246 food, note = self.parse_food(tokens[1:]) 

247 else: 

248 # only two arguments, first one is the amount 

249 # which means this is the food 

250 food = tokens[1] 

251 except ValueError: 

252 try: 

253 # can't parse first argument as amount 

254 # -> no unit -> parse everything as food 

255 food, note = self.parse_food(tokens) 

256 except ValueError: 

257 food = ' '.join(tokens[1:]) 

258 

259 if unit_note not in note: 

260 note += ' ' + unit_note 

261 

262 if unit and not self.ignore_rules: 

263 unit = self.automation.apply_unit_automation(unit) 

264 

265 if food and not self.ignore_rules: 

266 food = self.automation.apply_food_automation(food) 

267 if len(food) > Food._meta.get_field('name').max_length: # test if food name is to long 

268 # try splitting it at a space and taking only the first arg 

269 if len(food.split()) > 1 and len(food.split()[0]) < Food._meta.get_field('name').max_length: 

270 note = ' '.join(food.split()[1:]) + ' ' + note 

271 food = food.split()[0] 

272 else: 

273 note = food + ' ' + note 

274 food = food[:Food._meta.get_field('name').max_length] 

275 

276 if len(food.strip()) == 0: 

277 raise ValueError(f'Error parsing string {ingredient}, food cannot be empty') 

278 

279 return amount, unit, food, note[:Ingredient._meta.get_field('note').max_length].strip()