mirror of
https://github.com/SinTan1729/recipe-box-for-wikijs.git
synced 2024-12-26 10:08:37 -06:00
Add options to display supported sites, improve docs.
This commit is contained in:
parent
71e7934b47
commit
a6e92f03f0
1 changed files with 22 additions and 3 deletions
|
@ -13,7 +13,7 @@ import os
|
||||||
import requests
|
import requests
|
||||||
import sys
|
import sys
|
||||||
|
|
||||||
from recipe_scrapers import scrape_me, WebsiteNotImplementedError
|
from recipe_scrapers import scrape_me, WebsiteNotImplementedError, SCRAPERS
|
||||||
|
|
||||||
|
|
||||||
ROOT = os.path.dirname(os.path.abspath(__file__))
|
ROOT = os.path.dirname(os.path.abspath(__file__))
|
||||||
|
@ -83,8 +83,20 @@ def valid_filename(directory, filename=None, ascii=False):
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
|
||||||
parser = optparse.OptionParser('%prog url')
|
parser = optparse.OptionParser('%prog url')
|
||||||
|
|
||||||
|
parser.add_option('-l',
|
||||||
|
dest='list',
|
||||||
|
action='store_true',
|
||||||
|
default=False,
|
||||||
|
help='list all available sites')
|
||||||
|
|
||||||
options, args = parser.parse_args()
|
options, args = parser.parse_args()
|
||||||
|
|
||||||
|
if options.list:
|
||||||
|
for host in sorted(SCRAPERS):
|
||||||
|
print(host)
|
||||||
|
sys.exit()
|
||||||
|
|
||||||
config_path = os.path.join(ROOT, 'recipe_box.json')
|
config_path = os.path.join(ROOT, 'recipe_box.json')
|
||||||
if not os.path.exists(config_path):
|
if not os.path.exists(config_path):
|
||||||
config = {'recipe_box': '~/recipe_box/'}
|
config = {'recipe_box': '~/recipe_box/'}
|
||||||
|
@ -98,7 +110,12 @@ if __name__ == '__main__':
|
||||||
try:
|
try:
|
||||||
scraper = scrape_me(url)
|
scraper = scrape_me(url)
|
||||||
except WebsiteNotImplementedError:
|
except WebsiteNotImplementedError:
|
||||||
print('Scraper not defined for {url}'.format(url=url))
|
print('No scraper defined for {url}'.format(url=url))
|
||||||
|
print('It is recommended you add it to recipe-scrapers site, that way everybody gains from the effort.')
|
||||||
|
print('https://github.com/hhursev/recipe-scrapers#if-you-want-a-scraper-for-a-new-site-added')
|
||||||
|
print('')
|
||||||
|
print('Once someone has added the new scraper:')
|
||||||
|
print('pip install --upgrade recipe-scrapers')
|
||||||
else:
|
else:
|
||||||
recipe_box = ensure_directory_exists(config['recipe_box'])
|
recipe_box = ensure_directory_exists(config['recipe_box'])
|
||||||
media = ensure_directory_exists(os.path.join(config['recipe_box'], 'media'))
|
media = ensure_directory_exists(os.path.join(config['recipe_box'], 'media'))
|
||||||
|
@ -113,7 +130,9 @@ if __name__ == '__main__':
|
||||||
except:
|
except:
|
||||||
filename = None
|
filename = None
|
||||||
else:
|
else:
|
||||||
filename = os.path.basename(path)[:-3] + os.path.splitext(scraper.image())[1]
|
# Not sure about image urls without filename extensions, might need python-magic.
|
||||||
|
# Also, os.path.splitext(url), probably not a good idea. ;)
|
||||||
|
filename = os.path.splitext(os.path.basename(path))[0] + os.path.splitext(scraper.image())[1]
|
||||||
image = open(os.path.join(media, filename), 'wb')
|
image = open(os.path.join(media, filename), 'wb')
|
||||||
image.write(response.content)
|
image.write(response.content)
|
||||||
image.close()
|
image.close()
|
||||||
|
|
Loading…
Reference in a new issue