revisit building the mast notebooks¤

combine the toc/execution work from the mast notebooks with site aggregration work.

in the example, we hot patch reprs to make them accessible.

    import tonyfast.utils, pandas, json, nbconvert, inspect, textwrap, nbclient, nbformat, operator, bs4, anyio, pathlib, re, os, traitlets
    __import__("nest_asyncio").apply()

    if MAIN := __name__ == "__main__":
        class Config:
            dir = pathlib.Path("mast_notebooks").absolute()
            paths = ["mast_notebooks"]
            target = pathlib.Path("mast_out").absolute()
            exporter = nbconvert.get_exporter("a11y")(
                exclude_input_prompt=True, 
                include_sa11y=False,
                exclude_output_prompt=True,
                hide_anchor_links=True,
                include_settings=True,
                exclude_anchor_links=True,
                embed_images=True,
                validate_nb=False,
                include_visibility=True
            )
    FILE = globals().get("__file_") or os.environ.get("WRITE")
    async def from_notebook_node(nb, resources=None, exporter=Config.exporter):
        return exporter.from_notebook_node(nb, resources=resources)[0]

load in all the notebooks

find all the notebooks resembling a post. we are skipping the work needing to be done on the indexes and readmes. indexes and readmes use different exporter configurations than content notebooks.

    toc = (
        await pandas.Index([Config.dir / "_toc.yml"], name="path").apath().apath.load()
    ).series()

    config = (
        await pandas.Index([Config.dir / "_config.yml"], name="path").apath().apath.load()
    ).series().T.iloc[:,0]

    chapters = toc.parts.enumerate("chapter").series()
    sections = chapters.chapters.enumerate("section").series()   
    files = sections.sections.dropna().enumerate("section").series().combine_first(
        sections[["file"]].set_index(pandas.Index([0]*len(sections), name="section"), append=True)
    )

    chapters = toc.parts.enumerate("chapter").series()
    sections = chapters.chapters.enumerate("section").series()   
    files = sections.sections.dropna().enumerate("subsection").series().combine_first(
        sections[["file"]].set_index(pandas.Index([0]*len(sections), name="subsection"), append=True)
    )
    paths = ("mast_notebooks" / files.file.apath())
    print(F"{(~paths.path().path.exists()).sum()} files missing")
    paths = (await paths[await paths.apath().apath.exists()].apath.absolute()).pipe(pandas.Index)

4 files missing

    df = paths.to_series()
    df = df.loc[~df.astype(str).str.contains("checkpoint")].pipe(pandas.Index).rename("file")
    df = pandas.DataFrame(index=df[df.apath.suffix.eq(".ipynb")])

    dependencies = await (await (
        pandas.Index(["mast_notebooks/"]).apath().apath.rglob("requirements.txt")
    )).pipe(pandas.Index).apath.read_text()
    versions = dependencies.apply(str.splitlines).explode().str.extract(
        "^(?P<package>[a-z|A-Z|_|-|0-9]+)\s*(?P<constraint>[\&gt;|\&lt;|=]*)?\s*(?P<version>\S*)?"
    )

    import yaml; from pathlib import Path
    deps = versions.package.dropna().drop_duplicates().tolist()
    deps = [{"git": "GitPython"}.get(x,x) for x in deps ]
    Path("environment.yml").write_text(yaml.safe_dump(dict(
        name="mast_notebooks",
        channels=["conda-forge"],
        dependencies=["python=3.11", "pip", dict(
            pip=deps+ ["ipykernel", "astrocut", "lxml"]
        )]
    )))

%%bash
mamba env create -p.mast_nb -f environment.yml
mamba run -p.mast_nb --live-stream pip install -e../../../nbconvert-a11y
# mamba update -p.mast_nb -f environment.yml --force-reinstall




%%bash
mamba run  -p.mast_nb python -m ipykernel install --user --name mast_nb

    df = (await df.index.apath.read_text()).apply(json.loads)\
    .rename("nb").apply(nbformat.from_dict).pipe(df.join)
    if not FILE:
        df = df.head(10)
    df

	nb
file
/home/tbone/Documents/tonyfast/tonyfast/xxiv/mast_notebooks/notebooks/astrocut/making_tess_cubes_and_cutouts/making_tess_cubes_and_cutouts.ipynb	{'cells': [{'cell_type': 'markdown', 'metadata...
/home/tbone/Documents/tonyfast/tonyfast/xxiv/mast_notebooks/notebooks/astroquery/beginner_search/beginner_search.ipynb	{'cells': [{'cell_type': 'markdown', 'metadata...
/home/tbone/Documents/tonyfast/tonyfast/xxiv/mast_notebooks/notebooks/astroquery/beginner_zcut/beginner_zcut.ipynb	{'cells': [{'cell_type': 'markdown', 'metadata...
/home/tbone/Documents/tonyfast/tonyfast/xxiv/mast_notebooks/notebooks/astroquery/large_downloads/large_downloads.ipynb	{'cells': [{'cell_type': 'markdown', 'id': 'b0...
/home/tbone/Documents/tonyfast/tonyfast/xxiv/mast_notebooks/notebooks/astroquery/historic_quasar_observations/historic_quasar_observations.ipynb	{'cells': [{'cell_type': 'markdown', 'id': '21...
/home/tbone/Documents/tonyfast/tonyfast/xxiv/mast_notebooks/notebooks/astroquery/wildcard_searches/wildcard_searches.ipynb	{'cells': [{'cell_type': 'markdown', 'id': 'ba...
/home/tbone/Documents/tonyfast/tonyfast/xxiv/mast_notebooks/notebooks/HSC/HCV_API/HCV_API_demo.ipynb	{'cells': [{'cell_type': 'markdown', 'metadata...
/home/tbone/Documents/tonyfast/tonyfast/xxiv/mast_notebooks/notebooks/HSC/HSCV3_API/hscv3_api.ipynb	{'cells': [{'cell_type': 'markdown', 'metadata...
/home/tbone/Documents/tonyfast/tonyfast/xxiv/mast_notebooks/notebooks/HSC/HSCV3_SMC_API/hscv3_smc_api.ipynb	{'cells': [{'cell_type': 'markdown', 'metadata...
/home/tbone/Documents/tonyfast/tonyfast/xxiv/mast_notebooks/notebooks/HSC/HSC_TAP/HSC_TAP.ipynb	{'cells': [{'cell_type': 'markdown', 'metadata...

    import nbclient

the notebooks require preparation before they can transform to html

    MIDGY = re.compile("^\%\%[\s+,(pidgy),(midgy)]")

    def prepare_cell(cell):
        """make inplace changes to the notebook that carried through the publishing process"""
        cell.source = "".join(cell.source)
        if MIDGY.match(cell.source):
            cell.metadata.setdefault("jupyter", {})["source_hidden"] = True
        for out in cell.get("outputs", ""):
            for k, v in out.get("data", {}).items():  k.startswith("text") and out["data"].__setitem__(k, "".join(v))
            if "text" in out: out.text = "".join(out.text)
        return cell

    cells = df.nb.itemgetter("cells").enumerate("index").apply(prepare_cell).series()
    code = cells.loc[cells.cell_type.eq("code"), :]
    _idgy = code[code.source.str.contains("\s*%(?:re)load_ext\s+[pm]idgy")]

    df.loc[_idgy.index.get_level_values(0).drop_duplicates()].apply(
        lambda x: [
            y["metadata"].setdefault("jupyter", {}).setdefault("source_hidden", True)
            for y in x.loc["nb"]["cells"] if y["cell_type"] == "code"
        ] and None, axis=1 
    );

    def define_table_repr():
        %load_ext nbconvert_a11y.outputs
        %load_ext nbconvert_a11y.tables
        import astropy.table
        from nbconvert_a11y.outputs import BeautifulSoup
        from nbconvert_a11y.tables import repr_semantic, get_table, SHOW_INDEX, repr_semantic_update
        @repr_semantic.register(astropy.table.Table)
        def repr_astropy_table(table, *args, **kwargs):
            return get_table(obs.to_pandas(), BeautifulSoup(obs._base_repr_(True)).i.text, type_=type(table), SEMANTIC=False, ROW_INDEX=SHOW_INDEX.hide)

        repr_semantic_update()

    def inject_a11y(nb, LINES = "".join(inspect.getsourcelines(define_table_repr)[0][1:])):
        if nb.cells[1] is not LINES:
            nb.cells.insert(1, nbformat.v4.new_code_cell(LINES, metadata=dict(jupyter=dict(source_hidden=True))))
        return nb

    df.nb = df.nb.apply(nbformat.from_dict).apply(inject_a11y)

    client = df.nb.apply(
        nbclient.NotebookClient, kernel_name="mast_nb", allow_errors=True
    )

    df.nb = (
        await client.head(3).apply(nbclient.NotebookClient.async_execute).gather()
    ).combine_first(df.nb)

    def render_markdown_output(output):
        if "data" in output:
            if "text/markdown" in output["data"]:
                md = Config.exporter.environment.globals["markdown"](output["data"]["text/markdown"])
                output["data"]["text/html"] = md
                return md

    outputs = cells.outputs.dropna().enumerate("output").dropna()
    outputs.apply(render_markdown_output);
    markdowns = cells[cells.cell_type.eq("markdown")].apply(
        lambda s: operator.setitem(
            s.metadata.setdefault("data", {}),
            "text/html",
            html := Config.exporter.environment.filters["markdown2html"](dict(cell=s), s.source),
        )
        or html,
        axis=1,
    ).to_frame("html").assign(output=-1).set_index("output", append=True)

screate intermediate representations of markdown. when we handle this work before templating we can use partial information from the outcome to build the table of contents and relative links.

    html = pandas.concat(
        [
            markdowns,
            outputs.itemgetter("data").dropna().itemgetter("text/html").dropna().to_frame("html"),

        ]
    ).sort_index()

    html["soup"] = html.html.apply(bs4.BeautifulSoup, features="lxml")

extract the headings from each cell

    html["h"] = html.soup.methodcaller("select", "h1,h2,h3,h4,h5,h6")
    h = html.h.enumerate("h").dropna()

expand the headings into features on the dataframe

    h = h.to_frame("h").assign(
        level=h.attrgetter("name").str.lstrip("h").astype(int),
        string=h.attrgetter("text").str.rstrip("¶"),
        id=h.attrgetter("attrs").itemgetter("id")
    ); h.head()

				h	level	string	id
file	index	output	h
/home/tbone/Documents/tonyfast/tonyfast/xxiv/mast_notebooks/notebooks/HSC/HCV_API/HCV_API_demo.ipynb	0	-1	0	[Hubble Catalog of Variables Notebook (API ver...	1	Hubble Catalog of Variables Notebook (API vers...	None
	0	-1	1	[2019 - 2022, Rick White, Steve Lubow, Trenton...	3	2019 - 2022, Rick White, Steve Lubow, Trenton ...	None
	1	-1	0	[Instructions]	1	Instructions	None
	1	-1	1	[Table of Contents]	1	Table of Contents	None
	2	-1	0	[Initialization , []]	1	Initialization	None

extract the document title from the headings. _we should probably extract a description too. adding description to the meta is good for accessibility when choosing tabs.

				h	level	string	id
file	index	output	h
/home/tbone/Documents/tonyfast/tonyfast/xxiv/mast_notebooks/notebooks/HSC/HCV_API/HCV_API_demo.ipynb	0	-1	0	[Hubble Catalog of Variables Notebook (API ver...	1	Hubble Catalog of Variables Notebook (API vers...	None
	0	-1	1	[2019 - 2022, Rick White, Steve Lubow, Trenton...	3	2019 - 2022, Rick White, Steve Lubow, Trenton ...	None
	1	-1	0	[Instructions]	1	Instructions	None
	1	-1	1	[Table of Contents]	1	Table of Contents	None
	2	-1	0	[Initialization , []]	1	Initialization	None
...	...	...	...	...	...	...	...
/home/tbone/Documents/tonyfast/tonyfast/xxiv/mast_notebooks/notebooks/astroquery/wildcard_searches/wildcard_searches.ipynb	21	-1	0	[Case 2: Wildcard Search with , [instrument_na...	3	Case 2: Wildcard Search with instrument_name a...	None
	24	-1	0	[Case 3: Create a Moving Target Ephemeris usin...	3	Case 3: Create a Moving Target Ephemeris using...	None
	38	-1	0	[Resources]	2	Resources	None
	40	-1	0	[Citations]	2	Citations	None
	42	-1	0	[About This Notebook]	2	About This Notebook	None

155 rows × 4 columns

    df.assign(title=h.groupby(h.index.get_level_values("file")).apply(
        lambda s: s.sort_values("level").string.iloc[0]
    ))

	nb	title
file
/home/tbone/Documents/tonyfast/tonyfast/xxiv/mast_notebooks/notebooks/astrocut/making_tess_cubes_and_cutouts/making_tess_cubes_and_cutouts.ipynb	{'cells': [{'cell_type': 'markdown', 'metadata...	Generating Cubes and Cutouts from TESS FFIs
/home/tbone/Documents/tonyfast/tonyfast/xxiv/mast_notebooks/notebooks/astroquery/beginner_search/beginner_search.ipynb	{'cells': [{'cell_type': 'markdown', 'metadata...	Beginner: Searching MAST using astroquery.mast
/home/tbone/Documents/tonyfast/tonyfast/xxiv/mast_notebooks/notebooks/astroquery/beginner_zcut/beginner_zcut.ipynb	{'cells': [{'cell_type': 'markdown', 'metadata...	Beginner: Zcut and Astroquery Tutorial
/home/tbone/Documents/tonyfast/tonyfast/xxiv/mast_notebooks/notebooks/astroquery/large_downloads/large_downloads.ipynb	{'cells': [{'cell_type': 'markdown', 'id': 'b0...	Large Downloads in astroquery.mast
/home/tbone/Documents/tonyfast/tonyfast/xxiv/mast_notebooks/notebooks/astroquery/historic_quasar_observations/historic_quasar_observations.ipynb	{'cells': [{'cell_type': 'markdown', 'id': '21...	Historical Quasar Observations
/home/tbone/Documents/tonyfast/tonyfast/xxiv/mast_notebooks/notebooks/astroquery/wildcard_searches/wildcard_searches.ipynb	{'cells': [{'cell_type': 'markdown', 'id': 'ba...	Wildcard Handling with Astroquery.mast
/home/tbone/Documents/tonyfast/tonyfast/xxiv/mast_notebooks/notebooks/HSC/HCV_API/HCV_API_demo.ipynb	{'cells': [{'cell_type': 'markdown', 'metadata...	Hubble Catalog of Variables Notebook (API vers...
/home/tbone/Documents/tonyfast/tonyfast/xxiv/mast_notebooks/notebooks/HSC/HSCV3_API/hscv3_api.ipynb	{'cells': [{'cell_type': 'markdown', 'metadata...	Hubble Source Catalog API Notebook
/home/tbone/Documents/tonyfast/tonyfast/xxiv/mast_notebooks/notebooks/HSC/HSCV3_SMC_API/hscv3_smc_api.ipynb	{'cells': [{'cell_type': 'markdown', 'metadata...	Hubble Source Catalog API Notebook: SMC Color-...
/home/tbone/Documents/tonyfast/tonyfast/xxiv/mast_notebooks/notebooks/HSC/HSC_TAP/HSC_TAP.ipynb	{'cells': [{'cell_type': 'markdown', 'metadata...	MAST Table Access Protocol Hubble Source Catal...

    df = df.assign(title=h.groupby(h.index.get_level_values("file")).apply(
        lambda s: s.sort_values("level").string.iloc[0]
    ).rename("title"))

    df = df.assign(description=html.soup.methodcaller("select_one", "p").dropna().attrgetter("text").groupby(
        "file"
    ).apply(lambda x: x.sort_index().iloc[0]).rename("description").reindex(df.index))

    df.apply(
        lambda x: (
            x.title and x.loc["nb"].metadata.setdefault("title", x.title),
            x.description and x.loc["nb"].metadata.setdefault("description", x.description)
        ), axis=1
    );

make a table of contents details > nav > ol for a dataframe

    def make_toc(df):        
        toc = bs4.BeautifulSoup(features="lxml")
        toc.append(nav := toc.new_tag("nav"))
        nav.append(ol := toc.new_tag("ol"))
        last_level = 1
        for i, row in df.iterrows():
            if row.string:
                if row.level &gt; last_level:
                    for i in range(last_level, row.level):
                        last_level = i + 1
                        ol.append(li := toc.new_tag("li"))
                        li.append(ol := toc.new_tag("ol"))
                else:
                    for i in range(row.level, last_level):
                        if i == 1:
                            continue
                        if ol.parent and ol.parent.parent:
                            ol = ol.parent.parent
                ol.append(li := toc.new_tag("li"))
                li.append( a:= toc.new_tag("a"))
                a.append(row.string)
                a.attrs.update(href=F"#{row.id}")
        return toc

generate the table of contents for each file we have indexed

    df = df.assign(toc=h.groupby(h.index.get_level_values("file")).apply(make_toc).apply(str))

determine the location of the html version of the file.

    df = df.assign(target=(await (
        Config.target / df.index.apath.relative_to(Config.dir)
    ).apath().apath.with_suffix(".html").apath.absolute()).values)

    df = df.assign(target=(await (
        Config.target / df.index.apath.relative_to(Config.dir)
    ).apath().apath.with_suffix(".html").apath.absolute()).values)

    df = df.assign(**pandas.DataFrame([
            [None] + df.index.values[:-1].tolist(), df.index.values, df.index.values[1:].tolist() + [None]
    ], index=["prev", "file", "next"]).T.set_index("file"))

    def relative_path(source, target):
        """compute a relative path from source to target"""
        if target:
            common = []
            if not source.is_absolute():
                source = pathlib.Path(source).absolute()
            if not target.is_absolute():
                target = pathlib.Path(target).absolute()
            for common, (s, t) in enumerate(zip(source.parts, target.parts)):
                if s != t: break
            return type(source)(*[".."]*(len(target.parents)-common), *target.parts[common:])

generate the footer that contains the previous and next links

    df = df.assign(
        footer = df.apply(
                lambda s: (s.prev and F"""<a href="{relative_path(s.target, df.loc[s.prev].target)}" rel="prev&gt;&lt;span aria-hidden=" true"="">&lt;{df.loc[s.prev].title}</a><br/>""" or "")
                + (s.next and F"""<a href="{relative_path(s.target, df.loc[s.next].target)}" rel="next">{df.loc[s.next].title} <span aria-hidden="true">&gt;</span></a><br/>""" or ""),
                axis=1
        )
    )

    me = """<p>mast notebooks</p>"""

    df = df.assign(
        header = df.apply(
        lambda s: me + "<details><summary>site navigation</summary><nav><ol>%s</ol></nav></details>"% "".join(
            F"""<li><a href="{relative_path(s.target, t.target)}">{t.title}</a></li>"""
            for i, t in df.iterrows()
        ), axis=1
    ))

    await df.target.apath.parent.drop_duplicates().apath.mkdir(exist_ok=True, parents=True);

    df["html"] = await df[["nb"]].apply(
        lambda s: from_notebook_node(s["nb"], dict(toc=df.toc.loc[s.name], footer=df.loc[s.name].footer, header=df.loc[s.name].header)), axis=1).gather()

create an environment.yml file from the verions information previously collected

example outputs with accessible reprs¤

    if 0 or FILE:
        await df.target.apath.parent.drop_duplicates().apath.mkdir(exist_ok=True, parents=True);
        await df.apply(
            lambda s: print(F"""writing {s.target.as_uri()}""") or s.target.write_text(str(s.loc["html"])), axis=1
        ).gather()
    else:
        df.html.head(2).display.iframe().display()