skip to main content

@tonyfast s notebooks

site navigation
notebook summary
title
revisit building the mast notebooks
description
combine the toc/execution work from the mast notebooks with site aggregration work.
cells
54 total
39 code
state
executed in order
kernel
Python [conda env:p311] *
language
python
name
conda-env-p311-py
lines of code
247
outputs
8
table of contents
{"kernelspec": {"display_name": "Python [conda env:p311] *", "language": "python", "name": "conda-env-p311-py"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.3"}, "widgets": {"application/vnd.jupyter.widget-state+json": {"state": {}, "version_major": 2, "version_minor": 0}}, "title": "revisit building the mast notebooks", "description": "combine the toc/execution work from the mast notebooks with site aggregration work."}
notebook toolbar
Activate
cell ordering
1

revisit building the mast notebooks

combine the toc/execution work from the mast notebooks with site aggregration work.

in the example, we hot patch reprs to make them accessible.

2
    import tonyfast.utils, pandas, json, nbconvert, inspect, textwrap, nbclient, nbformat, operator, bs4, anyio, pathlib, re, os, traitlets
    __import__("nest_asyncio").apply()
3
    if MAIN := __name__ == "__main__":
        class Config:
            dir = pathlib.Path("mast_notebooks").absolute()
            paths = ["mast_notebooks"]
            target = pathlib.Path("mast_out").absolute()
            exporter = nbconvert.get_exporter("a11y")(
                exclude_input_prompt=True, 
                include_sa11y=False,
                exclude_output_prompt=True,
                hide_anchor_links=True,
                include_settings=True,
                exclude_anchor_links=True,
                embed_images=True,
                validate_nb=False,
                include_visibility=True
            )
    FILE = globals().get("__file_") or os.environ.get("WRITE")
    async def from_notebook_node(nb, resources=None, exporter=Config.exporter):
        return exporter.from_notebook_node(nb, resources=resources)[0]
4

load in all the notebooks

find all the notebooks resembling a post. we are skipping the work needing to be done on the indexes and readmes. indexes and readmes use different exporter configurations than content notebooks.

5
    toc = (
        await pandas.Index([Config.dir / "_toc.yml"], name="path").apath().apath.load()
    ).series()

    config = (
        await pandas.Index([Config.dir / "_config.yml"], name="path").apath().apath.load()
    ).series().T.iloc[:,0]
6
    chapters = toc.parts.enumerate("chapter").series()
    sections = chapters.chapters.enumerate("section").series()   
    files = sections.sections.dropna().enumerate("section").series().combine_first(
        sections[["file"]].set_index(pandas.Index([0]*len(sections), name="section"), append=True)
    )
7
    chapters = toc.parts.enumerate("chapter").series()
    sections = chapters.chapters.enumerate("section").series()   
    files = sections.sections.dropna().enumerate("subsection").series().combine_first(
        sections[["file"]].set_index(pandas.Index([0]*len(sections), name="subsection"), append=True)
    )
    paths = ("mast_notebooks" / files.file.apath())
    print(F"{(~paths.path().path.exists()).sum()} files missing")
    paths = (await paths[await paths.apath().apath.exists()].apath.absolute()).pipe(pandas.Index)
1 outputs.
4 files missing

8
    df = paths.to_series()
    df = df.loc[~df.astype(str).str.contains("checkpoint")].pipe(pandas.Index).rename("file")
    df = pandas.DataFrame(index=df[df.apath.suffix.eq(".ipynb")])
9
    dependencies = await (await (
        pandas.Index(["mast_notebooks/"]).apath().apath.rglob("requirements.txt")
    )).pipe(pandas.Index).apath.read_text()
    versions = dependencies.apply(str.splitlines).explode().str.extract(
        "^(?P<package>[a-z|A-Z|_|-|0-9]+)\s*(?P<constraint>[\>|\<|=]*)?\s*(?P<version>\S*)?"
    )
10
    
    import yaml; from pathlib import Path
    deps = versions.package.dropna().drop_duplicates().tolist()
    deps = [{"git": "GitPython"}.get(x,x) for x in deps ]
    Path("environment.yml").write_text(yaml.safe_dump(dict(
        name="mast_notebooks",
        channels=["conda-forge"],
        dependencies=["python=3.11", "pip", dict(
            pip=deps+ ["ipykernel", "astrocut", "lxml"]
        )]
    )))
1 outputs.
428
11
%%bash
mamba env create -p.mast_nb -f environment.yml
mamba run -p.mast_nb --live-stream pip install -e../../../nbconvert-a11y
# mamba update -p.mast_nb -f environment.yml --force-reinstall
12
%%bash
mamba run  -p.mast_nb python -m ipykernel install --user --name mast_nb
13
    df = (await df.index.apath.read_text()).apply(json.loads)\
    .rename("nb").apply(nbformat.from_dict).pipe(df.join)
    if not FILE:
        df = df.head(10)
    df
1 outputs.
nb
file
/home/tbone/Documents/tonyfast/tonyfast/xxiv/mast_notebooks/notebooks/astrocut/making_tess_cubes_and_cutouts/making_tess_cubes_and_cutouts.ipynb {'cells': [{'cell_type': 'markdown', 'metadata...
/home/tbone/Documents/tonyfast/tonyfast/xxiv/mast_notebooks/notebooks/astroquery/beginner_search/beginner_search.ipynb {'cells': [{'cell_type': 'markdown', 'metadata...
/home/tbone/Documents/tonyfast/tonyfast/xxiv/mast_notebooks/notebooks/astroquery/beginner_zcut/beginner_zcut.ipynb {'cells': [{'cell_type': 'markdown', 'metadata...
/home/tbone/Documents/tonyfast/tonyfast/xxiv/mast_notebooks/notebooks/astroquery/large_downloads/large_downloads.ipynb {'cells': [{'cell_type': 'markdown', 'id': 'b0...
/home/tbone/Documents/tonyfast/tonyfast/xxiv/mast_notebooks/notebooks/astroquery/historic_quasar_observations/historic_quasar_observations.ipynb {'cells': [{'cell_type': 'markdown', 'id': '21...
/home/tbone/Documents/tonyfast/tonyfast/xxiv/mast_notebooks/notebooks/astroquery/wildcard_searches/wildcard_searches.ipynb {'cells': [{'cell_type': 'markdown', 'id': 'ba...
/home/tbone/Documents/tonyfast/tonyfast/xxiv/mast_notebooks/notebooks/HSC/HCV_API/HCV_API_demo.ipynb {'cells': [{'cell_type': 'markdown', 'metadata...
/home/tbone/Documents/tonyfast/tonyfast/xxiv/mast_notebooks/notebooks/HSC/HSCV3_API/hscv3_api.ipynb {'cells': [{'cell_type': 'markdown', 'metadata...
/home/tbone/Documents/tonyfast/tonyfast/xxiv/mast_notebooks/notebooks/HSC/HSCV3_SMC_API/hscv3_smc_api.ipynb {'cells': [{'cell_type': 'markdown', 'metadata...
/home/tbone/Documents/tonyfast/tonyfast/xxiv/mast_notebooks/notebooks/HSC/HSC_TAP/HSC_TAP.ipynb {'cells': [{'cell_type': 'markdown', 'metadata...
14
    import nbclient
15

the notebooks require preparation before they can transform to html

16
    MIDGY = re.compile("^\%\%[\s+,(pidgy),(midgy)]")

    def prepare_cell(cell):
        """make inplace changes to the notebook that carried through the publishing process"""
        cell.source = "".join(cell.source)
        if MIDGY.match(cell.source):
            cell.metadata.setdefault("jupyter", {})["source_hidden"] = True
        for out in cell.get("outputs", ""):
            for k, v in out.get("data", {}).items():  k.startswith("text") and out["data"].__setitem__(k, "".join(v))
            if "text" in out: out.text = "".join(out.text)
        return cell
17
    cells = df.nb.itemgetter("cells").enumerate("index").apply(prepare_cell).series()
    code = cells.loc[cells.cell_type.eq("code"), :]
    _idgy = code[code.source.str.contains("\s*%(?:re)load_ext\s+[pm]idgy")]
18
    df.loc[_idgy.index.get_level_values(0).drop_duplicates()].apply(
        lambda x: [
            y["metadata"].setdefault("jupyter", {}).setdefault("source_hidden", True)
            for y in x.loc["nb"]["cells"] if y["cell_type"] == "code"
        ] and None, axis=1 
    );
19
    def define_table_repr():
        %load_ext nbconvert_a11y.outputs
        %load_ext nbconvert_a11y.tables
        import astropy.table
        from nbconvert_a11y.outputs import BeautifulSoup
        from nbconvert_a11y.tables import repr_semantic, get_table, SHOW_INDEX, repr_semantic_update
        @repr_semantic.register(astropy.table.Table)
        def repr_astropy_table(table, *args, **kwargs):
            return get_table(obs.to_pandas(), BeautifulSoup(obs._base_repr_(True)).i.text, type_=type(table), SEMANTIC=False, ROW_INDEX=SHOW_INDEX.hide)
        
        repr_semantic_update()
20
    def inject_a11y(nb, LINES = "".join(inspect.getsourcelines(define_table_repr)[0][1:])):
        if nb.cells[1] is not LINES:
            nb.cells.insert(1, nbformat.v4.new_code_cell(LINES, metadata=dict(jupyter=dict(source_hidden=True))))
        return nb
21
    df.nb = df.nb.apply(nbformat.from_dict).apply(inject_a11y)
22
    client = df.nb.apply(
        nbclient.NotebookClient, kernel_name="mast_nb", allow_errors=True
    )

    df.nb = (
        await client.head(3).apply(nbclient.NotebookClient.async_execute).gather()
    ).combine_first(df.nb)
23
    def render_markdown_output(output):
        if "data" in output:
            if "text/markdown" in output["data"]:
                md = Config.exporter.environment.globals["markdown"](output["data"]["text/markdown"])
                output["data"]["text/html"] = md
                return md
24
    outputs = cells.outputs.dropna().enumerate("output").dropna()
    outputs.apply(render_markdown_output);
    markdowns = cells[cells.cell_type.eq("markdown")].apply(
        lambda s: operator.setitem(
            s.metadata.setdefault("data", {}),
            "text/html",
            html := Config.exporter.environment.filters["markdown2html"](dict(cell=s), s.source),
        )
        or html,
        axis=1,
    ).to_frame("html").assign(output=-1).set_index("output", append=True)
25

screate intermediate representations of markdown. when we handle this work before templating we can use partial information from the outcome to build the table of contents and relative links.

26
    html = pandas.concat(
        [
            markdowns,
            outputs.itemgetter("data").dropna().itemgetter("text/html").dropna().to_frame("html"),
            
        ]
    ).sort_index()

    html["soup"] = html.html.apply(bs4.BeautifulSoup, features="lxml")
27

extract the headings from each cell

28
    html["h"] = html.soup.methodcaller("select", "h1,h2,h3,h4,h5,h6")
    h = html.h.enumerate("h").dropna()
29

expand the headings into features on the dataframe

30
    h = h.to_frame("h").assign(
        level=h.attrgetter("name").str.lstrip("h").astype(int),
        string=h.attrgetter("text").str.rstrip("¶"),
        id=h.attrgetter("attrs").itemgetter("id")
    ); h.head()
1 outputs.
h level string id
file index output h
/home/tbone/Documents/tonyfast/tonyfast/xxiv/mast_notebooks/notebooks/HSC/HCV_API/HCV_API_demo.ipynb 0 -1 0 [Hubble Catalog of Variables Notebook (API ver... 1 Hubble Catalog of Variables Notebook (API vers... None
1 [2019 - 2022, Rick White, Steve Lubow, Trenton... 3 2019 - 2022, Rick White, Steve Lubow, Trenton ... None
1 -1 0 [Instructions] 1 Instructions None
1 [Table of Contents] 1 Table of Contents None
2 -1 0 [Initialization , []] 1 Initialization None
31

extract the document title from the headings. _we should probably extract a description too. adding description to the meta is good for accessibility when choosing tabs.

32
    h
1 outputs.
h level string id
file index output h
/home/tbone/Documents/tonyfast/tonyfast/xxiv/mast_notebooks/notebooks/HSC/HCV_API/HCV_API_demo.ipynb 0 -1 0 [Hubble Catalog of Variables Notebook (API ver... 1 Hubble Catalog of Variables Notebook (API vers... None
1 [2019 - 2022, Rick White, Steve Lubow, Trenton... 3 2019 - 2022, Rick White, Steve Lubow, Trenton ... None
1 -1 0 [Instructions] 1 Instructions None
1 [Table of Contents] 1 Table of Contents None
2 -1 0 [Initialization , []] 1 Initialization None
... ... ... ... ... ... ... ...
/home/tbone/Documents/tonyfast/tonyfast/xxiv/mast_notebooks/notebooks/astroquery/wildcard_searches/wildcard_searches.ipynb 21 -1 0 [Case 2: Wildcard Search with , [instrument_na... 3 Case 2: Wildcard Search with instrument_name a... None
24 -1 0 [Case 3: Create a Moving Target Ephemeris usin... 3 Case 3: Create a Moving Target Ephemeris using... None
38 -1 0 [Resources] 2 Resources None
40 -1 0 [Citations] 2 Citations None
42 -1 0 [About This Notebook] 2 About This Notebook None

155 rows × 4 columns

33
    df.assign(title=h.groupby(h.index.get_level_values("file")).apply(
        lambda s: s.sort_values("level").string.iloc[0]
    ))
              
1 outputs.
nb title
file
/home/tbone/Documents/tonyfast/tonyfast/xxiv/mast_notebooks/notebooks/astrocut/making_tess_cubes_and_cutouts/making_tess_cubes_and_cutouts.ipynb {'cells': [{'cell_type': 'markdown', 'metadata... Generating Cubes and Cutouts from TESS FFIs
/home/tbone/Documents/tonyfast/tonyfast/xxiv/mast_notebooks/notebooks/astroquery/beginner_search/beginner_search.ipynb {'cells': [{'cell_type': 'markdown', 'metadata... Beginner: Searching MAST using astroquery.mast
/home/tbone/Documents/tonyfast/tonyfast/xxiv/mast_notebooks/notebooks/astroquery/beginner_zcut/beginner_zcut.ipynb {'cells': [{'cell_type': 'markdown', 'metadata... Beginner: Zcut and Astroquery Tutorial
/home/tbone/Documents/tonyfast/tonyfast/xxiv/mast_notebooks/notebooks/astroquery/large_downloads/large_downloads.ipynb {'cells': [{'cell_type': 'markdown', 'id': 'b0... Large Downloads in astroquery.mast
/home/tbone/Documents/tonyfast/tonyfast/xxiv/mast_notebooks/notebooks/astroquery/historic_quasar_observations/historic_quasar_observations.ipynb {'cells': [{'cell_type': 'markdown', 'id': '21... Historical Quasar Observations
/home/tbone/Documents/tonyfast/tonyfast/xxiv/mast_notebooks/notebooks/astroquery/wildcard_searches/wildcard_searches.ipynb {'cells': [{'cell_type': 'markdown', 'id': 'ba... Wildcard Handling with Astroquery.mast
/home/tbone/Documents/tonyfast/tonyfast/xxiv/mast_notebooks/notebooks/HSC/HCV_API/HCV_API_demo.ipynb {'cells': [{'cell_type': 'markdown', 'metadata... Hubble Catalog of Variables Notebook (API vers...
/home/tbone/Documents/tonyfast/tonyfast/xxiv/mast_notebooks/notebooks/HSC/HSCV3_API/hscv3_api.ipynb {'cells': [{'cell_type': 'markdown', 'metadata... Hubble Source Catalog API Notebook
/home/tbone/Documents/tonyfast/tonyfast/xxiv/mast_notebooks/notebooks/HSC/HSCV3_SMC_API/hscv3_smc_api.ipynb {'cells': [{'cell_type': 'markdown', 'metadata... Hubble Source Catalog API Notebook: SMC Color-...
/home/tbone/Documents/tonyfast/tonyfast/xxiv/mast_notebooks/notebooks/HSC/HSC_TAP/HSC_TAP.ipynb {'cells': [{'cell_type': 'markdown', 'metadata... MAST Table Access Protocol Hubble Source Catal...
34
    df = df.assign(title=h.groupby(h.index.get_level_values("file")).apply(
        lambda s: s.sort_values("level").string.iloc[0]
    ).rename("title"))
35
    df = df.assign(description=html.soup.methodcaller("select_one", "p").dropna().attrgetter("text").groupby(
        "file"
    ).apply(lambda x: x.sort_index().iloc[0]).rename("description").reindex(df.index))
36
    df.apply(
        lambda x: (
            x.title and x.loc["nb"].metadata.setdefault("title", x.title),
            x.description and x.loc["nb"].metadata.setdefault("description", x.description)
        ), axis=1
    );
37

make a table of contents details > nav > ol for a dataframe

38
    def make_toc(df):        
        toc = bs4.BeautifulSoup(features="lxml")
        toc.append(nav := toc.new_tag("nav"))
        nav.append(ol := toc.new_tag("ol"))
        last_level = 1
        for i, row in df.iterrows():
            if row.string:
                if row.level > last_level:
                    for i in range(last_level, row.level):
                        last_level = i + 1
                        ol.append(li := toc.new_tag("li"))
                        li.append(ol := toc.new_tag("ol"))
                else:
                    for i in range(row.level, last_level):
                        if i == 1:
                            continue
                        if ol.parent and ol.parent.parent:
                            ol = ol.parent.parent
                ol.append(li := toc.new_tag("li"))
                li.append( a:= toc.new_tag("a"))
                a.append(row.string)
                a.attrs.update(href=F"#{row.id}")
        return toc
39

generate the table of contents for each file we have indexed

40
    df = df.assign(toc=h.groupby(h.index.get_level_values("file")).apply(make_toc).apply(str))
41

determine the location of the html version of the file.

42
    df = df.assign(target=(await (
        Config.target / df.index.apath.relative_to(Config.dir)
    ).apath().apath.with_suffix(".html").apath.absolute()).values)
43
    df = df.assign(target=(await (
        Config.target / df.index.apath.relative_to(Config.dir)
    ).apath().apath.with_suffix(".html").apath.absolute()).values)
44
    df = df.assign(**pandas.DataFrame([
            [None] + df.index.values[:-1].tolist(), df.index.values, df.index.values[1:].tolist() + [None]
    ], index=["prev", "file", "next"]).T.set_index("file"))
45
    def relative_path(source, target):
        """compute a relative path from source to target"""
        if target:
            common = []
            if not source.is_absolute():
                source = pathlib.Path(source).absolute()
            if not target.is_absolute():
                target = pathlib.Path(target).absolute()
            for common, (s, t) in enumerate(zip(source.parts, target.parts)):
                if s != t: break
            return type(source)(*[".."]*(len(target.parents)-common), *target.parts[common:])
46

generate the footer that contains the previous and next links

47
    df = df.assign(
        footer = df.apply(
                lambda s: (s.prev and F"""<a href="{relative_path(s.target, df.loc[s.prev].target)}" rel="prev><span aria-hidden="true">&lt;</span>{df.loc[s.prev].title}</a><br/>""" or "")
                + (s.next and F"""<a href="{relative_path(s.target, df.loc[s.next].target)}" rel="next">{df.loc[s.next].title} <span aria-hidden="true">&gt;</span></a><br/>""" or ""),
                axis=1
        )
    )
48
    me = """<p>mast notebooks</p>"""
49
    df = df.assign(
        header = df.apply(
        lambda s: me + "<details><summary>site navigation</summary><nav><ol>%s</ol></nav></details>"% "".join(
            F"""<li><a href="{relative_path(s.target, t.target)}">{t.title}</a></li>"""
            for i, t in df.iterrows()
        ), axis=1
    ))
50
    await df.target.apath.parent.drop_duplicates().apath.mkdir(exist_ok=True, parents=True);
    
51
    df["html"] = await df[["nb"]].apply(
        lambda s: from_notebook_node(s["nb"], dict(toc=df.toc.loc[s.name], footer=df.loc[s.name].footer, header=df.loc[s.name].header)), axis=1).gather()
52

create an environment.yml file from the verions information previously collected

53

example outputs with accessible reprs

54
    if 0 or FILE:
        await df.target.apath.parent.drop_duplicates().apath.mkdir(exist_ok=True, parents=True);
        await df.apply(
            lambda s: print(F"""writing {s.target.as_uri()}""") or s.target.write_text(str(s.loc["html"])), axis=1
        ).gather()
    else:
        df.html.head(2).display.iframe().display()

    
2 outputs.