Merge Sayantan Santra (SinTan1729)'s improvements.

2025-04-19 10:00:00 -05:00 · 2023-07-01 13:36:55 +02:00 · 2023-07-01 13:36:55 +02:00 · 87360450ae
commit 87360450ae
parent 5b218043d0
1 changed files with 93 additions and 69 deletions
--- a/recipe_box.py
+++ b/recipe_box.py
@ -7,12 +7,18 @@
    1. https://obsidian.md/
    2. https://www.ourstate.com/a-kitchens-riches/
 """
+import argparse
 import json
-import optparse
 import os
-import requests
 import sys

+try:
+    import httpx
+    url_getter = httpx
+except ImportError:
+    import requests
+    url_getter = requests
+
 from recipe_scrapers import scrape_me, WebsiteNotImplementedError, SCRAPERS


@ -39,7 +45,7 @@ def ensure_directory_exists(path, expand_user=True, file=False):
            # A parallel process created the directory after the existence check.
            pass

-    return(path)
+    return path


 def valid_filename(directory, filename=None, ascii=False):
@ -66,7 +72,7 @@ def valid_filename(directory, filename=None, ascii=False):
    #     filename = filename.decode('ascii', 'ignore')

    # Allow for directories.
-    items = {item: True for item in os.listdir(directory)}
+    items = set(os.listdir(directory))
    if filename in items:
        count = 1
        while test_filename(filename, count) in items:
@ -80,24 +86,83 @@ def valid_filename(directory, filename=None, ascii=False):
        return filename


+def process_recipe(config, scraper, url, verbose=False):
+    """ Process the recipe at a given URL.
+    """
+    recipe_box = ensure_directory_exists(config['recipe_box'])
+    media = ensure_directory_exists(os.path.join(config['recipe_box'], 'media'))
+
+    prefix = scraper.title().lower()
+    path = os.path.join(recipe_box, prefix + '.md')
+    path = valid_filename(path)
+    recipe = open(path, 'w')
+
+    try:
+        image_url = scraper.image()
+        response = url_getter.get(image_url)
+    except:
+        filename = None
+    else:
+        # Not sure about image urls without filename extensions, might need python-magic.
+        # Also, os.path.splitext(url), probably not a good idea. ;)
+        filename = os.path.splitext(os.path.basename(path))[0] + os.path.splitext(scraper.image())[1]
+        filepath = os.path.join(media, filename)
+        image = open(filepath, 'wb')
+        image.write(response.content)
+        image.close()
+        if verbose:
+            print('Saving {url} -> {path}'.format(url=image_url, path=filepath))
+
+    recipe.write('# {title}\n'.format(title=scraper.title()))
+    if filename:
+        recipe.write('![[{filename}]]\n'.format(filename=filename))
+    recipe.write('\n')
+    # This is a placeholder for the user's own notes about the recipe.
+    recipe.write('## Notes\n')
+    recipe.write('\n')
+    recipe.write('## Metadata\n')
+    recipe.write('Yields: {yields}\n'.format(yields=scraper.yields()))
+    recipe.write('Total Time: {total_time}\n'.format(total_time=scraper.total_time()))
+    recipe.write('\n')
+    recipe.write('## Ingredients\n')
+    for ingredient in scraper.ingredients():
+        recipe.write('* {ingredient}\n'.format(ingredient=ingredient))
+
+    recipe.write('\n')
+    recipe.write('## Instructions\n')
+    for instruction in scraper.instructions().split('\n'):
+        instruction = instruction.strip()
+        if instruction:
+            if instruction[0].isdigit():
+                recipe.write('{instruction}\n'.format(instruction=instruction))
+            else:
+                recipe.write('1. {instruction}\n'.format(instruction=instruction))
+
+    recipe.write('\n')
+    recipe.write('[{url}]({url})\n'.format(url=url))
+    recipe.close()
+    # if verbose:
+    print('Saving {url} -> {path}'.format(url=url, path=path))
+
+
 def main():
    """ Console script entry point.
    """
-    parser = optparse.OptionParser('%prog url')
+    parser = argparse.ArgumentParser()
+    parser.add_argument('url', metavar='URL', type=str, nargs='*', default='', help='recipe url')
+    parser.add_argument('-l', dest='list', action='store_true', default=False, help='list all available sites')
+    parser.add_argument('-w', dest='wild_mode', action='store_true', default=False, help="try scraping 'unknown' site using wild-mode (some editing of the recipe might be required)")
+    parser.add_argument('-v', dest='verbose', action='store_true', default=False, help='verbose output')
+    args = parser.parse_args()

-    parser.add_option('-l',
-                      dest='list',
-                      action='store_true',
-                      default=False,
-                      help='list all available sites')
-
-    options, args = parser.parse_args()
-
-    if options.list:
+    if args.list:
        for host in sorted(SCRAPERS):
            print(host)
        sys.exit()

+    wild_mode = args.wild_mode
+    verbose = args.verbose
+
    config_path = ensure_directory_exists(os.path.join(ROOT, 'recipe_box.json'), file=True)
    if not os.path.exists(config_path):
        config = {'recipe_box': '~/recipe_box/'}
@ -107,64 +172,23 @@ def main():
        with open(config_path, 'r') as f:
            config = json.load(f)

-    for url in args:
-        try:
-            scraper = scrape_me(url)
-        except WebsiteNotImplementedError:
-            print('No scraper defined for {url}'.format(url=url))
-            print('It is recommended you add it to recipe-scrapers site, that way everybody gains from the effort.')
-            print('https://github.com/hhursev/recipe-scrapers#if-you-want-a-scraper-for-a-new-site-added')
-            print('')
-            print('Once someone has added the new scraper:')
-            print('pip install --upgrade recipe-scrapers')
-        else:
-            recipe_box = ensure_directory_exists(config['recipe_box'])
-            media = ensure_directory_exists(os.path.join(config['recipe_box'], 'media'))
-
-            prefix = scraper.title().lower()
-            path = os.path.join(recipe_box, prefix + '.md')
-            path = valid_filename(path)
-            recipe = open(path, 'w')
-
+    for url in args.url:
+        if url:
            try:
-                response = requests.get(scraper.image())
-            except:
-                filename = None
+                scraper = scrape_me(url, wild_mode=wild_mode)
+            except WebsiteNotImplementedError:
+                print('No scraper defined for {url}'.format(url=url))
+                print('Try using the -w [wild-mode] option, your mileage may vary.')
+                print('')
+                print('It is recommended you add it to recipe-scrapers site, that way everybody gains from the effort.')
+                print('https://github.com/hhursev/recipe-scrapers#if-you-want-a-scraper-for-a-new-site-added')
+                print('')
+                print('Once someone has added the new scraper:')
+                print('pip install --upgrade recipe-scrapers')
            else:
-                # Not sure about image urls without filename extensions, might need python-magic.
-                # Also, os.path.splitext(url), probably not a good idea. ;)
-                filename = os.path.splitext(os.path.basename(path))[0] + os.path.splitext(scraper.image())[1]
-                image = open(os.path.join(media, filename), 'wb')
-                image.write(response.content)
-                image.close()
-
-            recipe.write('# {title}\n'.format(title=scraper.title()))
-            if filename:
-                recipe.write('![[{filename}]]\n'.format(filename=filename))
-            recipe.write('\n')
-            recipe.write('## Notes\n')
-            recipe.write('\n')
-            recipe.write('## Metadata\n')
-            recipe.write('Yields: {yields}\n'.format(yields=scraper.yields()))
-            recipe.write('Total Time: {total_time}\n'.format(total_time=scraper.total_time()))
-            recipe.write('\n')
-            recipe.write('## Ingredients\n')
-            for ingredient in scraper.ingredients():
-                recipe.write('* {ingredient}\n'.format(ingredient=ingredient))
-
-            recipe.write('\n')
-            recipe.write('## Instructions\n')
-            for instruction in scraper.instructions().split('\n'):
-                instruction = instruction.strip()
-                if instruction:
-                    if instruction[0].isdigit():
-                        recipe.write('{instruction}\n'.format(instruction=instruction))
-                    else:
-                        recipe.write('1. {instruction}\n'.format(instruction=instruction))
-
-            recipe.write('\n')
-            recipe.write('[{url}]({url})\n'.format(url=url))
+                process_recipe(config, scraper, url, verbose)


 if __name__ == '__main__':
    main()
+