This notebook uses doit
to download copies of my gists.
%load_ext doit
from toolz.curried import *; from pandas import DataFrame, concat, Series # Can't wildcard pandas when using doit.
from doit.tools import create_folder; from pathlib import Path; import requests as r
__user__ = __import__('requests').get('https://api.github.com/users/tonyfast').json()
df = concat([
DataFrame(__import__('requests').get(__user__['gists_url'].replace('{/gist_id}', '?page=')+str(i+1)).json()) for i in range(__user__['public_gists']//30)]).set_index('id')
files = concat([
df.files.apply(compose(Series, list, pluck(_), dict.values)).stack().rename(_)
for _ in ['filename', 'raw_url']], axis=1)
df.sample(2)
len(df)
def download(url, to):
"""Download a url and write it to file."""
print(to)
try: to.write_text(r.get(url).text); return True
except: return False
doit
task to download only the notebooks. Effort can be placed here to make a command line api.def task_store_nb_gists():
for (name, i), s in files.drop_duplicates().iterrows():
if Path(s.loc['filename']).suffix == '.ipynb':
i = str(i)
name = Path('~/gists') / name / i / s.loc['filename']
yield dict(
name=name, targets=[name], actions=[
(create_folder, [str(name.parent)]),
(download, [s.loc['raw_url'], name])])
%doit store_nb_gists