Coverage for cookbook/helper/ingredient_parser.py: 85%
175 statements
« prev ^ index » next coverage.py v7.4.0, created at 2023-12-29 01:02 +0100
« prev ^ index » next coverage.py v7.4.0, created at 2023-12-29 01:02 +0100
1import re
2import string
3import unicodedata
5from cookbook.helper.automation_helper import AutomationEngine
6from cookbook.models import Food, Ingredient, Unit
9class IngredientParser:
10 request = None
11 ignore_rules = False
12 automation = None
14 def __init__(self, request, cache_mode=True, ignore_automations=False):
15 """
16 Initialize ingredient parser
17 :param request: request context (to control caching, rule ownership, etc.)
18 :param cache_mode: defines if all rules should be loaded on initialization (good when parser is used many times) or if they should be retrieved every time (good when parser is not used many times in a row)
19 :param ignore_automations: ignore automation rules, allows to use ingredient parser without database access/request (request can be None)
20 """
21 self.request = request
22 self.ignore_rules = ignore_automations
23 if not self.ignore_rules:
24 self.automation = AutomationEngine(self.request, use_cache=cache_mode)
26 def get_unit(self, unit):
27 """
28 Get or create a unit for given space respecting possible automations
29 :param unit: string unit
30 :return: None if unit passed is invalid, Unit object otherwise
31 """
32 if not unit:
33 return None
34 if len(unit) > 0:
35 if self.ignore_rules:
36 u, created = Unit.objects.get_or_create(name=unit.strip(), space=self.request.space)
37 else:
38 u, created = Unit.objects.get_or_create(name=self.automation.apply_unit_automation(unit), space=self.request.space)
39 return u
40 return None
42 def get_food(self, food):
43 """
44 Get or create a food for given space respecting possible automations
45 :param food: string food
46 :return: None if food passed is invalid, Food object otherwise
47 """
48 if not food:
49 return None
50 if len(food) > 0:
51 if self.ignore_rules:
52 f, created = Food.objects.get_or_create(name=food.strip(), space=self.request.space)
53 else:
54 f, created = Food.objects.get_or_create(name=self.automation.apply_food_automation(food), space=self.request.space)
55 return f
56 return None
58 def parse_fraction(self, x):
59 if len(x) == 1 and 'fraction' in unicodedata.decomposition(x):
60 frac_split = unicodedata.decomposition(x[-1:]).split()
61 return (float((frac_split[1]).replace('003', ''))
62 / float((frac_split[3]).replace('003', '')))
63 else:
64 frac_split = x.split('/')
65 if not len(frac_split) == 2:
66 raise ValueError
67 try:
68 return int(frac_split[0]) / int(frac_split[1])
69 except ZeroDivisionError:
70 raise ValueError
72 def parse_amount(self, x):
73 amount = 0
74 unit = None
75 note = ''
76 if x.strip() == '':
77 return amount, unit, note
79 did_check_frac = False
80 end = 0
81 while (end < len(x) and (x[end] in string.digits
82 or (
83 (x[end] == '.' or x[end] == ',' or x[end] == '/')
84 and end + 1 < len(x)
85 and x[end + 1] in string.digits
86 ))):
87 end += 1
88 if end > 0:
89 if "/" in x[:end]:
90 amount = self.parse_fraction(x[:end])
91 else:
92 amount = float(x[:end].replace(',', '.'))
93 else:
94 amount = self.parse_fraction(x[0])
95 end += 1
96 did_check_frac = True
97 if end < len(x):
98 if did_check_frac:
99 unit = x[end:]
100 else:
101 try:
102 amount += self.parse_fraction(x[end])
103 unit = x[end + 1:]
104 except ValueError:
105 unit = x[end:]
107 if unit is not None and unit.strip() == '':
108 unit = None
110 if unit is not None and (unit.startswith('(') or unit.startswith(
111 '-')): # i dont know any unit that starts with ( or - so its likely an alternative like 1L (500ml) Water or 2-3
112 unit = None
113 note = x
114 return amount, unit, note
116 def parse_food_with_comma(self, tokens):
117 food = ''
118 note = ''
119 start = 0
120 # search for first occurrence of an argument ending in a comma
121 while start < len(tokens) and not tokens[start].endswith(','):
122 start += 1
123 if start == len(tokens):
124 # no token ending in a comma found -> use everything as food
125 food = ' '.join(tokens)
126 else:
127 food = ' '.join(tokens[:start + 1])[:-1]
128 note = ' '.join(tokens[start + 1:])
129 return food, note
131 def parse_food(self, tokens):
132 food = ''
133 note = ''
134 if tokens[-1].endswith(')'):
135 # Check if the matching opening bracket is in the same token
136 if (not tokens[-1].startswith('(')) and ('(' in tokens[-1]):
137 return self.parse_food_with_comma(tokens)
138 # last argument ends with closing bracket -> look for opening bracket
139 start = len(tokens) - 1
140 while not tokens[start].startswith('(') and not start == 0:
141 start -= 1
142 if start == 0:
143 # the whole list is wrapped in brackets -> assume it is an error (e.g. assumed first argument was the unit) # noqa: E501
144 raise ValueError
145 elif start < 0:
146 # no opening bracket anywhere -> just ignore the last bracket
147 food, note = self.parse_food_with_comma(tokens)
148 else:
149 # opening bracket found -> split in food and note, remove brackets from note # noqa: E501
150 note = ' '.join(tokens[start:])[1:-1]
151 food = ' '.join(tokens[:start])
152 else:
153 food, note = self.parse_food_with_comma(tokens)
154 return food, note
156 def parse(self, ingredient):
157 """
158 Main parsing function, takes an ingredient string (e.g. '1 l Water') and extracts amount, unit, food, ...
159 :param ingredient: string ingredient
160 :return: amount, unit (can be None), food, note (can be empty)
161 """
162 # initialize default values
163 amount = 0
164 unit = None
165 food = ''
166 note = ''
167 unit_note = ''
169 if len(ingredient) == 0:
170 raise ValueError('string to parse cannot be empty')
172 if len(ingredient) > 512:
173 raise ValueError('cannot parse ingredients with more than 512 characters')
175 # some people/languages put amount and unit at the end of the ingredient string
176 # if something like this is detected move it to the beginning so the parser can handle it
177 if len(ingredient) < 1000 and re.search(r'^([^\W\d_])+(.)*[1-9](\d)*\s*([^\W\d_])+', ingredient):
178 match = re.search(r'[1-9](\d)*\s*([^\W\d_])+', ingredient)
179 print(f'reording from {ingredient} to {ingredient[match.start():match.end()] + " " + ingredient.replace(ingredient[match.start():match.end()], "")}')
180 ingredient = ingredient[match.start():match.end()] + ' ' + ingredient.replace(ingredient[match.start():match.end()], '')
182 # if the string contains parenthesis early on remove it and place it at the end
183 # because its likely some kind of note
184 if re.match('(.){1,6}\\s\\((.[^\\(\\)])+\\)\\s', ingredient):
185 match = re.search('\\((.[^\\(])+\\)', ingredient)
186 ingredient = ingredient[:match.start()] + ingredient[match.end():] + ' ' + ingredient[match.start():match.end()]
188 # leading spaces before commas result in extra tokens, clean them out
189 ingredient = ingredient.replace(' ,', ',')
191 # handle "(from) - (to)" amounts by using the minimum amount and adding the range to the description
192 # "10.5 - 200 g XYZ" => "100 g XYZ (10.5 - 200)"
193 ingredient = re.sub("^(\\d+|\\d+[\\.,]\\d+) - (\\d+|\\d+[\\.,]\\d+) (.*)", "\\1 \\3 (\\1 - \\2)", ingredient)
195 # if amount and unit are connected add space in between
196 if re.match('([0-9])+([A-z])+\\s', ingredient):
197 ingredient = re.sub(r'(?<=([a-z])|\d)(?=(?(1)\d|[a-z]))', ' ', ingredient)
199 if not self.ignore_rules:
200 ingredient = self.automation.apply_transpose_automation(ingredient)
202 tokens = ingredient.split() # split at each space into tokens
203 if len(tokens) == 1:
204 # there only is one argument, that must be the food
205 food = tokens[0]
206 else:
207 try:
208 # try to parse first argument as amount
209 amount, unit, unit_note = self.parse_amount(tokens[0])
210 # only try to parse second argument as amount if there are at least
211 # three arguments if it already has a unit there can't be
212 # a fraction for the amount
213 if len(tokens) > 2:
214 if not self.ignore_rules:
215 tokens = self.automation.apply_never_unit_automation(tokens)
216 try:
217 if unit is not None:
218 # a unit is already found, no need to try the second argument for a fraction
219 # probably not the best method to do it, but I didn't want to make an if check and paste the exact same thing in the else as already is in the except
220 raise ValueError
221 # try to parse second argument as amount and add that, in case of '2 1/2' or '2 ½'
222 amount += self.parse_fraction(tokens[1])
223 # assume that units can't end with a comma
224 if len(tokens) > 3 and not tokens[2].endswith(','):
225 # try to use third argument as unit and everything else as food, use everything as food if it fails
226 try:
227 food, note = self.parse_food(tokens[3:])
228 unit = tokens[2]
229 except ValueError:
230 food, note = self.parse_food(tokens[2:])
231 else:
232 food, note = self.parse_food(tokens[2:])
233 except ValueError:
234 # assume that units can't end with a comma
235 if not tokens[1].endswith(','):
236 # try to use second argument as unit and everything else as food, use everything as food if it fails
237 try:
238 food, note = self.parse_food(tokens[2:])
239 if unit is None:
240 unit = tokens[1]
241 else:
242 note = tokens[1]
243 except ValueError:
244 food, note = self.parse_food(tokens[1:])
245 else:
246 food, note = self.parse_food(tokens[1:])
247 else:
248 # only two arguments, first one is the amount
249 # which means this is the food
250 food = tokens[1]
251 except ValueError:
252 try:
253 # can't parse first argument as amount
254 # -> no unit -> parse everything as food
255 food, note = self.parse_food(tokens)
256 except ValueError:
257 food = ' '.join(tokens[1:])
259 if unit_note not in note:
260 note += ' ' + unit_note
262 if unit and not self.ignore_rules:
263 unit = self.automation.apply_unit_automation(unit)
265 if food and not self.ignore_rules:
266 food = self.automation.apply_food_automation(food)
267 if len(food) > Food._meta.get_field('name').max_length: # test if food name is to long
268 # try splitting it at a space and taking only the first arg
269 if len(food.split()) > 1 and len(food.split()[0]) < Food._meta.get_field('name').max_length:
270 note = ' '.join(food.split()[1:]) + ' ' + note
271 food = food.split()[0]
272 else:
273 note = food + ' ' + note
274 food = food[:Food._meta.get_field('name').max_length]
276 if len(food.strip()) == 0:
277 raise ValueError(f'Error parsing string {ingredient}, food cannot be empty')
279 return amount, unit, food, note[:Ingredient._meta.get_field('note').max_length].strip()