automating blog posts to work with jupyter-lite¤
there is a rare occasion that i write notebooks completely in lite. most often i write in a conventional environment then need to ammend the content to work when we are in jupyterlite.
%reload_ext pidgy
%reload_ext pidgy
## what do we need to do to make a post work in lite?
* explicitly defined dependencies.
working on a virtual file system is different than your standard file system.
normally we don't have to define our environment each time,
but independent of a real file system - in the browser - we need to install packages each time.
* patching shit
if we use requests then we should used https://github.com/koenvo/pyodide-http
* dealing with `pidgy` and extensions.
* some packages won't work in lite so we will throw a warning when we know this fo
we can infer this information or provide it explicitly in the metadata
what do we need to do to make a post work in lite?¤
-
explicitly defined dependencies.
working on a virtual file system is different than your standard file system. normally we don't have to define our environment each time, but independent of a real file system - in the browser - we need to install packages each time.
-
patching shit
if we use requests then we should used https://github.com/koenvo/pyodide-http
-
dealing with
pidgy
and extensions. -
some packages won't work in lite so we will throw a warning when we know this fo
we can infer this information or provide it explicitly in the metadata
sometimes i forget imports
sometimes i forget imports
## [`depfinder`](https://github.com/ericdill/depfinder) to find packages in a notebook for python
some of my personal style choices might fail like when i use `__import__`, maybe this is a way to cut dependencies from the list.
depfinder
to find packages in a notebook for python¤
some of my personal style choices might fail like when i use __import__
, maybe this is a way to cut dependencies from the list.
from pathlib import Path
import depfinder, pandas
__import__("requests_cache").install_cache()
from pathlib import Path
import depfinder, pandas
__import__("requests_cache").install_cache()
def get_files(dir="", glob="*.ipynb") -> pandas.Index:
return pandas.Index(Path(dir).rglob(glob)).rename("files")
def get_files(dir="", glob="*.ipynb") -> pandas.Index:
return pandas.Index(Path(dir).rglob(glob)).rename("files")
def get_cells(files: pandas.Index) -> pandas.DataFrame:
df = (
files.to_series().apply(Path.read_text)
.apply(json.loads).apply(pandas.Series)
.cells.apply(pandas.Series).stack().apply(pandas.Series)
)
return df.join(get_has_pidgy(df))
def get_cells(files: pandas.Index) -> pandas.DataFrame:
df = (
files.to_series().apply(Path.read_text)
.apply(json.loads).apply(pandas.Series)
.cells.apply(pandas.Series).stack().apply(pandas.Series)
)
return df.join(get_has_pidgy(df))
can haz pidgy?¤
some of these posts are in pidgy
, i'll use %reload_ext pidgy
when that is the situation.
peek in the cells
to find pidgy
notebooks.
def get_has_pidgy(cells):
return cells[cells.cell_type.eq("code")].source.apply("".join).groupby(
pandas.Grouper(level=0)
).apply(lambda df: df.str.contains("%[re]*load_ext pidgy").any()).rename("pidgy")
def get_has_pidgy(cells):
return cells[cells.cell_type.eq("code")].source.apply("".join).groupby(
pandas.Grouper(level=0)
).apply(lambda df: df.str.contains("%[re]*load_ext pidgy").any()).rename("pidgy")
cells = get_cells(get_files())
cells = get_cells(get_files())
### get the imports
def get_import(row: pandas.Series) -> dict:
`get_import` normalizes the cell source code for analysis by `depfinder`.
this method catches those situations or returns the attributes of `depfinder.inspection.ImportFinder`
source = "".join(row.source)
if row.pidgy:
source = midgy.python.Python().render(source)
try:
return vars(depfinder.inspection.get_imported_libs(textwrap.dedent(source), row.name[0]))
except BaseException as e:
return None
get the imports¤
def get_import(row: pandas.Series) -> dict:
get_import
normalizes the cell source code for analysis by depfinder
.
this method catches those situations or returns the attributes of depfinder.inspection.ImportFinder
source = "".join(row.source)
if row.pidgy:
source = midgy.python.Python().render(source)
try:
return vars(depfinder.inspection.get_imported_libs(textwrap.dedent(source), row.name[0]))
except BaseException as e:
return None
evaluate the sources¤
import depfinder, pandas, midgy
__import__("requests_cache").install_cache()
Ø = __name__ == "__main__" and "__file__" not in locals()
import depfinder, pandas, midgy
__import__("requests_cache").install_cache()
Ø = __name__ == "__main__" and "__file__" not in locals()
def get_modules(cells):
return (
(
results:=
cells[cells.cell_type.eq("code")].apply(get_import, axis=1)
.dropna().apply(functools.partial(pandas.Series, dtype="O"))
)[results.columns[results.columns.str.endswith("_modules")]]
)
def get_modules(cells):
return (
(
results:=
cells[cells.cell_type.eq("code")].apply(get_import, axis=1)
.dropna().apply(functools.partial(pandas.Series, dtype="O"))
)[results.columns[results.columns.str.endswith("_modules")]]
)
a snapshot of the modules import within the content
if Ø:
(cells := get_cells(get_files()))
(cells := cells.join(get_modules(cells)))
modules = cells[cells.columns[cells.columns.str.endswith("_modules")]]
modules = modules.stack().apply(list).apply(pandas.Series, dtype="O").stack()
return HTML(modules.value_counts().to_frame().T.to_html())
if Ø:
(cells := get_cells(get_files()))
(cells := cells.join(get_modules(cells)))
modules = cells[cells.columns[cells.columns.str.endswith("_modules")]]
modules = modules.stack().apply(list).apply(pandas.Series, dtype="O").stack()
return HTML(modules.value_counts().to_frame().T.to_html())
todo¤
- inject the imports back into the notebooks. where though?
- find magics