# revisit building the mast notebooks

combine the toc/execution work from the mast notebooks with site aggregration work.

in the example, we hot patch reprs to make them accessible.

# revisit building the mast notebooks

combine the toc/execution work from the mast notebooks with site aggregration work.

in the example, we hot patch reprs to make them accessible.


        {'data': {'text/html': 'revisit building the mast notebooks
\ncombine the toc/execution work from the mast notebooks with site aggregration work.
\nin the example, we hot patch reprs to make them accessible.\n'}}

revisit building the mast notebooks

combine the toc/execution work from the mast notebooks with site aggregration work.

in the example, we hot patch reprs to make them accessible.

    import tonyfast.utils, pandas, json, nbconvert, inspect, textwrap, nbclient, nbformat, operator, bs4, anyio, pathlib, re, os, traitlets
    __import__("nest_asyncio").apply()

    import tonyfast.utils, pandas, json, nbconvert, inspect, textwrap, nbclient, nbformat, operator, bs4, anyio, pathlib, re, os, traitlets
    __import__("nest_asyncio").apply()

    if MAIN := __name__ == "__main__":
        class Config:
            dir = pathlib.Path("mast_notebooks").absolute()
            paths = ["mast_notebooks"]
            target = pathlib.Path("mast_out").absolute()
            exporter = nbconvert.get_exporter("a11y")(
                exclude_input_prompt=True, 
                include_sa11y=False,
                exclude_output_prompt=True,
                hide_anchor_links=True,
                include_settings=True,
                exclude_anchor_links=True,
                embed_images=True,
                validate_nb=False,
                include_visibility=True
            )
    FILE = globals().get("__file_") or os.environ.get("WRITE")
    async def from_notebook_node(nb, resources=None, exporter=Config.exporter):
        return exporter.from_notebook_node(nb, resources=resources)[0]

    if MAIN := __name__ == "__main__":
        class Config:
            dir = pathlib.Path("mast_notebooks").absolute()
            paths = ["mast_notebooks"]
            target = pathlib.Path("mast_out").absolute()
            exporter = nbconvert.get_exporter("a11y")(
                exclude_input_prompt=True, 
                include_sa11y=False,
                exclude_output_prompt=True,
                hide_anchor_links=True,
                include_settings=True,
                exclude_anchor_links=True,
                embed_images=True,
                validate_nb=False,
                include_visibility=True
            )
    FILE = globals().get("__file_") or os.environ.get("WRITE")
    async def from_notebook_node(nb, resources=None, exporter=Config.exporter):
        return exporter.from_notebook_node(nb, resources=resources)[0]

load in all the notebooks

find all the notebooks resembling a post.
we are skipping the work needing to be done on the indexes and readmes.
indexes and readmes use different exporter configurations than content notebooks.

load in all the notebooks

find all the notebooks resembling a post.
we are skipping the work needing to be done on the indexes and readmes.
indexes and readmes use different exporter configurations than content notebooks.


        {'data': {'text/html': 'load in all the notebooks
\nfind all the notebooks resembling a post.\nwe are skipping the work needing to be done on the indexes and readmes.\nindexes and readmes use different exporter configurations than content notebooks.\n'}}

load in all the notebooks

find all the notebooks resembling a post. we are skipping the work needing to be done on the indexes and readmes. indexes and readmes use different exporter configurations than content notebooks.

    toc = (
        await pandas.Index([Config.dir / "_toc.yml"], name="path").apath().apath.load()
    ).series()

    config = (
        await pandas.Index([Config.dir / "_config.yml"], name="path").apath().apath.load()
    ).series().T.iloc[:,0]

    toc = (
        await pandas.Index([Config.dir / "_toc.yml"], name="path").apath().apath.load()
    ).series()

    config = (
        await pandas.Index([Config.dir / "_config.yml"], name="path").apath().apath.load()
    ).series().T.iloc[:,0]

    chapters = toc.parts.enumerate("chapter").series()
    sections = chapters.chapters.enumerate("section").series()   
    files = sections.sections.dropna().enumerate("section").series().combine_first(
        sections[["file"]].set_index(pandas.Index([0]*len(sections), name="section"), append=True)
    )

    chapters = toc.parts.enumerate("chapter").series()
    sections = chapters.chapters.enumerate("section").series()   
    files = sections.sections.dropna().enumerate("section").series().combine_first(
        sections[["file"]].set_index(pandas.Index([0]*len(sections), name="section"), append=True)
    )

    chapters = toc.parts.enumerate("chapter").series()
    sections = chapters.chapters.enumerate("section").series()   
    files = sections.sections.dropna().enumerate("subsection").series().combine_first(
        sections[["file"]].set_index(pandas.Index([0]*len(sections), name="subsection"), append=True)
    )
    paths = ("mast_notebooks" / files.file.apath())
    print(F"{(~paths.path().path.exists()).sum()} files missing")
    paths = (await paths[await paths.apath().apath.exists()].apath.absolute()).pipe(pandas.Index)

    chapters = toc.parts.enumerate("chapter").series()
    sections = chapters.chapters.enumerate("section").series()   
    files = sections.sections.dropna().enumerate("subsection").series().combine_first(
        sections[["file"]].set_index(pandas.Index([0]*len(sections), name="subsection"), append=True)
    )
    paths = ("mast_notebooks" / files.file.apath())
    print(F"{(~paths.path().path.exists()).sum()} files missing")
    paths = (await paths[await paths.apath().apath.exists()].apath.absolute()).pipe(pandas.Index)

4 files missing

    df = paths.to_series()
    df = df.loc[~df.astype(str).str.contains("checkpoint")].pipe(pandas.Index).rename("file")
    df = pandas.DataFrame(index=df[df.apath.suffix.eq(".ipynb")])

    df = paths.to_series()
    df = df.loc[~df.astype(str).str.contains("checkpoint")].pipe(pandas.Index).rename("file")
    df = pandas.DataFrame(index=df[df.apath.suffix.eq(".ipynb")])

    dependencies = await (await (
        pandas.Index(["mast_notebooks/"]).apath().apath.rglob("requirements.txt")
    )).pipe(pandas.Index).apath.read_text()
    versions = dependencies.apply(str.splitlines).explode().str.extract(
        "^(?P<package>[a-z|A-Z|_|-|0-9]+)\s*(?P<constraint>[\>|\<|=]*)?\s*(?P<version>\S*)?"
    )</version></constraint></package>

    dependencies = await (await (
        pandas.Index(["mast_notebooks/"]).apath().apath.rglob("requirements.txt")
    )).pipe(pandas.Index).apath.read_text()
    versions = dependencies.apply(str.splitlines).explode().str.extract(
        "^(?P<package>[a-z|A-Z|_|-|0-9]+)\s*(?P<constraint>[\>|\<|=]*)?\s*(?P<version>\S*)?"
    )

    
    import yaml; from pathlib import Path
    deps = versions.package.dropna().drop_duplicates().tolist()
    deps = [{"git": "GitPython"}.get(x,x) for x in deps ]
    Path("environment.yml").write_text(yaml.safe_dump(dict(
        name="mast_notebooks",
        channels=["conda-forge"],
        dependencies=["python=3.11", "pip", dict(
            pip=deps+ ["ipykernel", "astrocut", "lxml"]
        )]
    )))

    
    import yaml; from pathlib import Path
    deps = versions.package.dropna().drop_duplicates().tolist()
    deps = [{"git": "GitPython"}.get(x,x) for x in deps ]
    Path("environment.yml").write_text(yaml.safe_dump(dict(
        name="mast_notebooks",
        channels=["conda-forge"],
        dependencies=["python=3.11", "pip", dict(
            pip=deps+ ["ipykernel", "astrocut", "lxml"]
        )]
    )))

    %%bash
    mamba env create -p.mast_nb -f environment.yml
    mamba run -p.mast_nb --live-stream pip install -e../../../nbconvert-a11y
    # mamba update -p.mast_nb -f environment.yml --force-reinstall

    %%bash
    mamba env create -p.mast_nb -f environment.yml
    mamba run -p.mast_nb --live-stream pip install -e../../../nbconvert-a11y
    # mamba update -p.mast_nb -f environment.yml --force-reinstall


        {'data': {'text/html': '%%bash\nmamba env create -p.mast_nb -f environment.yml\nmamba run -p.mast_nb --live-stream pip install -e../../../nbconvert-a11y\n# mamba update -p.mast_nb -f environment.yml --force-reinstall\n'}}

%%bash
mamba env create -p.mast_nb -f environment.yml
mamba run -p.mast_nb --live-stream pip install -e../../../nbconvert-a11y
# mamba update -p.mast_nb -f environment.yml --force-reinstall

    %%bash
    mamba run  -p.mast_nb python -m ipykernel install --user --name mast_nb

    %%bash
    mamba run  -p.mast_nb python -m ipykernel install --user --name mast_nb

%%bash
mamba run  -p.mast_nb python -m ipykernel install --user --name mast_nb

    df = (await df.index.apath.read_text()).apply(json.loads)\
    .rename("nb").apply(nbformat.from_dict).pipe(df.join)
    if not FILE:
        df = df.head(10)
    df

    df = (await df.index.apath.read_text()).apply(json.loads)\
    .rename("nb").apply(nbformat.from_dict).pipe(df.join)
    if not FILE:
        df = df.head(10)
    df

	nb
file
/home/tbone/Documents/tonyfast/tonyfast/xxiv/mast_notebooks/notebooks/astrocut/making_tess_cubes_and_cutouts/making_tess_cubes_and_cutouts.ipynb	{'cells': [{'cell_type': 'markdown', 'metadata...
/home/tbone/Documents/tonyfast/tonyfast/xxiv/mast_notebooks/notebooks/astroquery/beginner_search/beginner_search.ipynb	{'cells': [{'cell_type': 'markdown', 'metadata...
/home/tbone/Documents/tonyfast/tonyfast/xxiv/mast_notebooks/notebooks/astroquery/beginner_zcut/beginner_zcut.ipynb	{'cells': [{'cell_type': 'markdown', 'metadata...
/home/tbone/Documents/tonyfast/tonyfast/xxiv/mast_notebooks/notebooks/astroquery/large_downloads/large_downloads.ipynb	{'cells': [{'cell_type': 'markdown', 'id': 'b0...
/home/tbone/Documents/tonyfast/tonyfast/xxiv/mast_notebooks/notebooks/astroquery/historic_quasar_observations/historic_quasar_observations.ipynb	{'cells': [{'cell_type': 'markdown', 'id': '21...
/home/tbone/Documents/tonyfast/tonyfast/xxiv/mast_notebooks/notebooks/astroquery/wildcard_searches/wildcard_searches.ipynb	{'cells': [{'cell_type': 'markdown', 'id': 'ba...
/home/tbone/Documents/tonyfast/tonyfast/xxiv/mast_notebooks/notebooks/HSC/HCV_API/HCV_API_demo.ipynb	{'cells': [{'cell_type': 'markdown', 'metadata...
/home/tbone/Documents/tonyfast/tonyfast/xxiv/mast_notebooks/notebooks/HSC/HSCV3_API/hscv3_api.ipynb	{'cells': [{'cell_type': 'markdown', 'metadata...
/home/tbone/Documents/tonyfast/tonyfast/xxiv/mast_notebooks/notebooks/HSC/HSCV3_SMC_API/hscv3_smc_api.ipynb	{'cells': [{'cell_type': 'markdown', 'metadata...
/home/tbone/Documents/tonyfast/tonyfast/xxiv/mast_notebooks/notebooks/HSC/HSC_TAP/HSC_TAP.ipynb	{'cells': [{'cell_type': 'markdown', 'metadata...

    import nbclient

    import nbclient

the notebooks require preparation before they can transform to html

    MIDGY = re.compile("^\%\%[\s+,(pidgy),(midgy)]")

    def prepare_cell(cell):
        """make inplace changes to the notebook that carried through the publishing process"""
        cell.source = "".join(cell.source)
        if MIDGY.match(cell.source):
            cell.metadata.setdefault("jupyter", {})["source_hidden"] = True
        for out in cell.get("outputs", ""):
            for k, v in out.get("data", {}).items():  k.startswith("text") and out["data"].__setitem__(k, "".join(v))
            if "text" in out: out.text = "".join(out.text)
        return cell

    MIDGY = re.compile("^\%\%[\s+,(pidgy),(midgy)]")

    def prepare_cell(cell):
        """make inplace changes to the notebook that carried through the publishing process"""
        cell.source = "".join(cell.source)
        if MIDGY.match(cell.source):
            cell.metadata.setdefault("jupyter", {})["source_hidden"] = True
        for out in cell.get("outputs", ""):
            for k, v in out.get("data", {}).items():  k.startswith("text") and out["data"].__setitem__(k, "".join(v))
            if "text" in out: out.text = "".join(out.text)
        return cell

    cells = df.nb.itemgetter("cells").enumerate("index").apply(prepare_cell).series()
    code = cells.loc[cells.cell_type.eq("code"), :]
    _idgy = code[code.source.str.contains("\s*%(?:re)load_ext\s+[pm]idgy")]

    cells = df.nb.itemgetter("cells").enumerate("index").apply(prepare_cell).series()
    code = cells.loc[cells.cell_type.eq("code"), :]
    _idgy = code[code.source.str.contains("\s*%(?:re)load_ext\s+[pm]idgy")]

    df.loc[_idgy.index.get_level_values(0).drop_duplicates()].apply(
        lambda x: [
            y["metadata"].setdefault("jupyter", {}).setdefault("source_hidden", True)
            for y in x.loc["nb"]["cells"] if y["cell_type"] == "code"
        ] and None, axis=1 
    );

    df.loc[_idgy.index.get_level_values(0).drop_duplicates()].apply(
        lambda x: [
            y["metadata"].setdefault("jupyter", {}).setdefault("source_hidden", True)
            for y in x.loc["nb"]["cells"] if y["cell_type"] == "code"
        ] and None, axis=1 
    );

    def define_table_repr():
        %load_ext nbconvert_a11y.outputs
        %load_ext nbconvert_a11y.tables
        import astropy.table
        from nbconvert_a11y.outputs import BeautifulSoup
        from nbconvert_a11y.tables import repr_semantic, get_table, SHOW_INDEX, repr_semantic_update
        @repr_semantic.register(astropy.table.Table)
        def repr_astropy_table(table, *args, **kwargs):
            return get_table(obs.to_pandas(), BeautifulSoup(obs._base_repr_(True)).i.text, type_=type(table), SEMANTIC=False, ROW_INDEX=SHOW_INDEX.hide)
        
        repr_semantic_update()

    def define_table_repr():
        %load_ext nbconvert_a11y.outputs
        %load_ext nbconvert_a11y.tables
        import astropy.table
        from nbconvert_a11y.outputs import BeautifulSoup
        from nbconvert_a11y.tables import repr_semantic, get_table, SHOW_INDEX, repr_semantic_update
        @repr_semantic.register(astropy.table.Table)
        def repr_astropy_table(table, *args, **kwargs):
            return get_table(obs.to_pandas(), BeautifulSoup(obs._base_repr_(True)).i.text, type_=type(table), SEMANTIC=False, ROW_INDEX=SHOW_INDEX.hide)
        
        repr_semantic_update()

    def inject_a11y(nb, LINES = "".join(inspect.getsourcelines(define_table_repr)[0][1:])):
        if nb.cells[1] is not LINES:
            nb.cells.insert(1, nbformat.v4.new_code_cell(LINES, metadata=dict(jupyter=dict(source_hidden=True))))
        return nb

    def inject_a11y(nb, LINES = "".join(inspect.getsourcelines(define_table_repr)[0][1:])):
        if nb.cells[1] is not LINES:
            nb.cells.insert(1, nbformat.v4.new_code_cell(LINES, metadata=dict(jupyter=dict(source_hidden=True))))
        return nb

    df.nb = df.nb.apply(nbformat.from_dict).apply(inject_a11y)

    df.nb = df.nb.apply(nbformat.from_dict).apply(inject_a11y)

    client = df.nb.apply(
        nbclient.NotebookClient, kernel_name="mast_nb", allow_errors=True
    )

    df.nb = (
        await client.head(3).apply(nbclient.NotebookClient.async_execute).gather()
    ).combine_first(df.nb)

    client = df.nb.apply(
        nbclient.NotebookClient, kernel_name="mast_nb", allow_errors=True
    )

    df.nb = (
        await client.head(3).apply(nbclient.NotebookClient.async_execute).gather()
    ).combine_first(df.nb)

    def render_markdown_output(output):
        if "data" in output:
            if "text/markdown" in output["data"]:
                md = Config.exporter.environment.globals["markdown"](output["data"]["text/markdown"])
                output["data"]["text/html"] = md
                return md

    def render_markdown_output(output):
        if "data" in output:
            if "text/markdown" in output["data"]:
                md = Config.exporter.environment.globals["markdown"](output["data"]["text/markdown"])
                output["data"]["text/html"] = md
                return md

    outputs = cells.outputs.dropna().enumerate("output").dropna()
    outputs.apply(render_markdown_output);
    markdowns = cells[cells.cell_type.eq("markdown")].apply(
        lambda s: operator.setitem(
            s.metadata.setdefault("data", {}),
            "text/html",
            html := Config.exporter.environment.filters["markdown2html"](dict(cell=s), s.source),
        )
        or html,
        axis=1,
    ).to_frame("html").assign(output=-1).set_index("output", append=True)

    outputs = cells.outputs.dropna().enumerate("output").dropna()
    outputs.apply(render_markdown_output);
    markdowns = cells[cells.cell_type.eq("markdown")].apply(
        lambda s: operator.setitem(
            s.metadata.setdefault("data", {}),
            "text/html",
            html := Config.exporter.environment.filters["markdown2html"](dict(cell=s), s.source),
        )
        or html,
        axis=1,
    ).to_frame("html").assign(output=-1).set_index("output", append=True)

screate intermediate representations of markdown. when we handle this work before templating we can use partial information from the outcome
to build the table of contents and relative links.

screate intermediate representations of markdown. when we handle this work before templating we can use partial information from the outcome
to build the table of contents and relative links.


        {'data': {'text/html': 'screate intermediate representations of markdown. when we handle this work before templating we can use partial information from the outcome\nto build the table of contents and relative links.\n'}}

screate intermediate representations of markdown. when we handle this work before templating we can use partial information from the outcome to build the table of contents and relative links.

    html = pandas.concat(
        [
            markdowns,
            outputs.itemgetter("data").dropna().itemgetter("text/html").dropna().to_frame("html"),
            
        ]
    ).sort_index()

    html["soup"] = html.html.apply(bs4.BeautifulSoup, features="lxml")

    html = pandas.concat(
        [
            markdowns,
            outputs.itemgetter("data").dropna().itemgetter("text/html").dropna().to_frame("html"),
            
        ]
    ).sort_index()

    html["soup"] = html.html.apply(bs4.BeautifulSoup, features="lxml")

extract the headings from each cell

    html["h"] = html.soup.methodcaller("select", "h1,h2,h3,h4,h5,h6")
    h = html.h.enumerate("h").dropna()

    html["h"] = html.soup.methodcaller("select", "h1,h2,h3,h4,h5,h6")
    h = html.h.enumerate("h").dropna()

expand the headings into features on the dataframe

    h = h.to_frame("h").assign(
        level=h.attrgetter("name").str.lstrip("h").astype(int),
        string=h.attrgetter("text").str.rstrip("¶"),
        id=h.attrgetter("attrs").itemgetter("id")
    ); h.head()

    h = h.to_frame("h").assign(
        level=h.attrgetter("name").str.lstrip("h").astype(int),
        string=h.attrgetter("text").str.rstrip("¶"),
        id=h.attrgetter("attrs").itemgetter("id")
    ); h.head()

				h	level	string	id
file	index	output	h
/home/tbone/Documents/tonyfast/tonyfast/xxiv/mast_notebooks/notebooks/HSC/HCV_API/HCV_API_demo.ipynb	0	-1	0	[Hubble Catalog of Variables Notebook (API ver...	1	Hubble Catalog of Variables Notebook (API vers...	None
	0	-1	1	[2019 - 2022, Rick White, Steve Lubow, Trenton...	3	2019 - 2022, Rick White, Steve Lubow, Trenton ...	None
	1	-1	0	[Instructions]	1	Instructions	None
	1	-1	1	[Table of Contents]	1	Table of Contents	None
	2	-1	0	[Initialization , []]	1	Initialization	None

extract the document title from the headings. _we should probably extract a description too. 
adding description to the meta is good for accessibility when choosing tabs.

extract the document title from the headings. _we should probably extract a description too. 
adding description to the meta is good for accessibility when choosing tabs.


        {'data': {'text/html': 'extract the document title from the headings. _we should probably extract a description too.\nadding description to the meta is good for accessibility when choosing tabs.\n'}}

extract the document title from the headings. _we should probably extract a description too. adding description to the meta is good for accessibility when choosing tabs.

h

				h	level	string	id
file	index	output	h
/home/tbone/Documents/tonyfast/tonyfast/xxiv/mast_notebooks/notebooks/HSC/HCV_API/HCV_API_demo.ipynb	0	-1	0	[Hubble Catalog of Variables Notebook (API ver...	1	Hubble Catalog of Variables Notebook (API vers...	None
	0	-1	1	[2019 - 2022, Rick White, Steve Lubow, Trenton...	3	2019 - 2022, Rick White, Steve Lubow, Trenton ...	None
	1	-1	0	[Instructions]	1	Instructions	None
	1	-1	1	[Table of Contents]	1	Table of Contents	None
	2	-1	0	[Initialization , []]	1	Initialization	None
...	...	...	...	...	...	...	...
/home/tbone/Documents/tonyfast/tonyfast/xxiv/mast_notebooks/notebooks/astroquery/wildcard_searches/wildcard_searches.ipynb	21	-1	0	[Case 2: Wildcard Search with , [instrument_na...	3	Case 2: Wildcard Search with instrument_name a...	None
	24	-1	0	[Case 3: Create a Moving Target Ephemeris usin...	3	Case 3: Create a Moving Target Ephemeris using...	None
	38	-1	0	[Resources]	2	Resources	None
	40	-1	0	[Citations]	2	Citations	None
	42	-1	0	[About This Notebook]	2	About This Notebook	None

155 rows × 4 columns

    df.assign(title=h.groupby(h.index.get_level_values("file")).apply(
        lambda s: s.sort_values("level").string.iloc[0]
    ))

    df.assign(title=h.groupby(h.index.get_level_values("file")).apply(
        lambda s: s.sort_values("level").string.iloc[0]
    ))

	nb	title
file
/home/tbone/Documents/tonyfast/tonyfast/xxiv/mast_notebooks/notebooks/astrocut/making_tess_cubes_and_cutouts/making_tess_cubes_and_cutouts.ipynb	{'cells': [{'cell_type': 'markdown', 'metadata...	Generating Cubes and Cutouts from TESS FFIs
/home/tbone/Documents/tonyfast/tonyfast/xxiv/mast_notebooks/notebooks/astroquery/beginner_search/beginner_search.ipynb	{'cells': [{'cell_type': 'markdown', 'metadata...	Beginner: Searching MAST using astroquery.mast
/home/tbone/Documents/tonyfast/tonyfast/xxiv/mast_notebooks/notebooks/astroquery/beginner_zcut/beginner_zcut.ipynb	{'cells': [{'cell_type': 'markdown', 'metadata...	Beginner: Zcut and Astroquery Tutorial
/home/tbone/Documents/tonyfast/tonyfast/xxiv/mast_notebooks/notebooks/astroquery/large_downloads/large_downloads.ipynb	{'cells': [{'cell_type': 'markdown', 'id': 'b0...	Large Downloads in astroquery.mast
/home/tbone/Documents/tonyfast/tonyfast/xxiv/mast_notebooks/notebooks/astroquery/historic_quasar_observations/historic_quasar_observations.ipynb	{'cells': [{'cell_type': 'markdown', 'id': '21...	Historical Quasar Observations
/home/tbone/Documents/tonyfast/tonyfast/xxiv/mast_notebooks/notebooks/astroquery/wildcard_searches/wildcard_searches.ipynb	{'cells': [{'cell_type': 'markdown', 'id': 'ba...	Wildcard Handling with Astroquery.mast
/home/tbone/Documents/tonyfast/tonyfast/xxiv/mast_notebooks/notebooks/HSC/HCV_API/HCV_API_demo.ipynb	{'cells': [{'cell_type': 'markdown', 'metadata...	Hubble Catalog of Variables Notebook (API vers...
/home/tbone/Documents/tonyfast/tonyfast/xxiv/mast_notebooks/notebooks/HSC/HSCV3_API/hscv3_api.ipynb	{'cells': [{'cell_type': 'markdown', 'metadata...	Hubble Source Catalog API Notebook
/home/tbone/Documents/tonyfast/tonyfast/xxiv/mast_notebooks/notebooks/HSC/HSCV3_SMC_API/hscv3_smc_api.ipynb	{'cells': [{'cell_type': 'markdown', 'metadata...	Hubble Source Catalog API Notebook: SMC Color-...
/home/tbone/Documents/tonyfast/tonyfast/xxiv/mast_notebooks/notebooks/HSC/HSC_TAP/HSC_TAP.ipynb	{'cells': [{'cell_type': 'markdown', 'metadata...	MAST Table Access Protocol Hubble Source Catal...

    df = df.assign(title=h.groupby(h.index.get_level_values("file")).apply(
        lambda s: s.sort_values("level").string.iloc[0]
    ).rename("title"))

    df = df.assign(title=h.groupby(h.index.get_level_values("file")).apply(
        lambda s: s.sort_values("level").string.iloc[0]
    ).rename("title"))

    df = df.assign(description=html.soup.methodcaller("select_one", "p").dropna().attrgetter("text").groupby(
        "file"
    ).apply(lambda x: x.sort_index().iloc[0]).rename("description").reindex(df.index))

    df = df.assign(description=html.soup.methodcaller("select_one", "p").dropna().attrgetter("text").groupby(
        "file"
    ).apply(lambda x: x.sort_index().iloc[0]).rename("description").reindex(df.index))

    df.apply(
        lambda x: (
            x.title and x.loc["nb"].metadata.setdefault("title", x.title),
            x.description and x.loc["nb"].metadata.setdefault("description", x.description)
        ), axis=1
    );

    df.apply(
        lambda x: (
            x.title and x.loc["nb"].metadata.setdefault("title", x.title),
            x.description and x.loc["nb"].metadata.setdefault("description", x.description)
        ), axis=1
    );

make a table of contents details > nav > ol for a dataframe

    def make_toc(df):        
        toc = bs4.BeautifulSoup(features="lxml")
        toc.append(nav := toc.new_tag("nav"))
        nav.append(ol := toc.new_tag("ol"))
        last_level = 1
        for i, row in df.iterrows():
            if row.string:
                if row.level > last_level:
                    for i in range(last_level, row.level):
                        last_level = i + 1
                        ol.append(li := toc.new_tag("li"))
                        li.append(ol := toc.new_tag("ol"))
                else:
                    for i in range(row.level, last_level):
                        if i == 1:
                            continue
                        if ol.parent and ol.parent.parent:
                            ol = ol.parent.parent
                ol.append(li := toc.new_tag("li"))
                li.append( a:= toc.new_tag("a"))
                a.append(row.string)
                a.attrs.update(href=F"#{row.id}")
        return toc

    def make_toc(df):        
        toc = bs4.BeautifulSoup(features="lxml")
        toc.append(nav := toc.new_tag("nav"))
        nav.append(ol := toc.new_tag("ol"))
        last_level = 1
        for i, row in df.iterrows():
            if row.string:
                if row.level > last_level:
                    for i in range(last_level, row.level):
                        last_level = i + 1
                        ol.append(li := toc.new_tag("li"))
                        li.append(ol := toc.new_tag("ol"))
                else:
                    for i in range(row.level, last_level):
                        if i == 1:
                            continue
                        if ol.parent and ol.parent.parent:
                            ol = ol.parent.parent
                ol.append(li := toc.new_tag("li"))
                li.append( a:= toc.new_tag("a"))
                a.append(row.string)
                a.attrs.update(href=F"#{row.id}")
        return toc

generate the table of contents for each file we have indexed

    df = df.assign(toc=h.groupby(h.index.get_level_values("file")).apply(make_toc).apply(str))

    df = df.assign(toc=h.groupby(h.index.get_level_values("file")).apply(make_toc).apply(str))

determine the location of the html version of the file.

    df = df.assign(target=(await (
        Config.target / df.index.apath.relative_to(Config.dir)
    ).apath().apath.with_suffix(".html").apath.absolute()).values)

    df = df.assign(target=(await (
        Config.target / df.index.apath.relative_to(Config.dir)
    ).apath().apath.with_suffix(".html").apath.absolute()).values)

    df = df.assign(target=(await (
        Config.target / df.index.apath.relative_to(Config.dir)
    ).apath().apath.with_suffix(".html").apath.absolute()).values)

    df = df.assign(target=(await (
        Config.target / df.index.apath.relative_to(Config.dir)
    ).apath().apath.with_suffix(".html").apath.absolute()).values)

    df = df.assign(**pandas.DataFrame([
            [None] + df.index.values[:-1].tolist(), df.index.values, df.index.values[1:].tolist() + [None]
    ], index=["prev", "file", "next"]).T.set_index("file"))

    df = df.assign(**pandas.DataFrame([
            [None] + df.index.values[:-1].tolist(), df.index.values, df.index.values[1:].tolist() + [None]
    ], index=["prev", "file", "next"]).T.set_index("file"))

    def relative_path(source, target):
        """compute a relative path from source to target"""
        if target:
            common = []
            if not source.is_absolute():
                source = pathlib.Path(source).absolute()
            if not target.is_absolute():
                target = pathlib.Path(target).absolute()
            for common, (s, t) in enumerate(zip(source.parts, target.parts)):
                if s != t: break
            return type(source)(*[".."]*(len(target.parents)-common), *target.parts[common:])

    def relative_path(source, target):
        """compute a relative path from source to target"""
        if target:
            common = []
            if not source.is_absolute():
                source = pathlib.Path(source).absolute()
            if not target.is_absolute():
                target = pathlib.Path(target).absolute()
            for common, (s, t) in enumerate(zip(source.parts, target.parts)):
                if s != t: break
            return type(source)(*[".."]*(len(target.parents)-common), *target.parts[common:])

generate the footer that contains the previous and next links

    df = df.assign(
        footer = df.apply(
                lambda s: (s.prev and F"""<a href="{relative_path(s.target, df.loc[s.prev].target)}" rel="prev><span aria-hidden=" true"=""><{df.loc[s.prev].title}</a><br/>""" or "")
                + (s.next and F"""<a href="{relative_path(s.target, df.loc[s.next].target)}" rel="next">{df.loc[s.next].title} <span aria-hidden="true">></span></a><br/>""" or ""),
                axis=1
        )
    )

    df = df.assign(
        footer = df.apply(
                lambda s: (s.prev and F"""<a href="{relative_path(s.target, df.loc[s.prev].target)}" rel="prev><span aria-hidden="true">&lt;</span>{df.loc[s.prev].title}</a><br/>""" or "")
                + (s.next and F"""<a href="{relative_path(s.target, df.loc[s.next].target)}" rel="next">{df.loc[s.next].title} <span aria-hidden="true">&gt;</span></a><br/>""" or ""),
                axis=1
        )
    )

    me = """<p>mast notebooks</p>"""

    me = """<p>mast notebooks</p>"""

    df = df.assign(
        header = df.apply(
        lambda s: me + "<details><summary>site navigation</summary><nav><ol>%s</ol></nav></details>"% "".join(
            F"""<li><a href="{relative_path(s.target, t.target)}">{t.title}</a></li>"""
            for i, t in df.iterrows()
        ), axis=1
    ))

    df = df.assign(
        header = df.apply(
        lambda s: me + "<details><summary>site navigation</summary><nav><ol>%s</ol></nav></details>"% "".join(
            F"""<li><a href="{relative_path(s.target, t.target)}">{t.title}</a></li>"""
            for i, t in df.iterrows()
        ), axis=1
    ))

    await df.target.apath.parent.drop_duplicates().apath.mkdir(exist_ok=True, parents=True);

    await df.target.apath.parent.drop_duplicates().apath.mkdir(exist_ok=True, parents=True);

    df["html"] = await df[["nb"]].apply(
        lambda s: from_notebook_node(s["nb"], dict(toc=df.toc.loc[s.name], footer=df.loc[s.name].footer, header=df.loc[s.name].header)), axis=1).gather()

    df["html"] = await df[["nb"]].apply(
        lambda s: from_notebook_node(s["nb"], dict(toc=df.toc.loc[s.name], footer=df.loc[s.name].footer, header=df.loc[s.name].header)), axis=1).gather()

create an environment.yml file from the verions information previously collected

create an environment.yml file from the verions information previously collected

create an environment.yml file from the verions information previously collected

example outputs with accessible reprs

    if 0 or FILE:
        await df.target.apath.parent.drop_duplicates().apath.mkdir(exist_ok=True, parents=True);
        await df.apply(
            lambda s: print(F"""writing {s.target.as_uri()}""") or s.target.write_text(str(s.loc["html"])), axis=1
        ).gather()
    else:
        df.html.head(2).display.iframe().display()

    if 0 or FILE:
        await df.target.apath.parent.drop_duplicates().apath.mkdir(exist_ok=True, parents=True);
        await df.apply(
            lambda s: print(F"""writing {s.target.as_uri()}""") or s.target.write_text(str(s.loc["html"])), axis=1
        ).gather()
    else:
        df.html.head(2).display.iframe().display()

	index	execution_count	cell_type	source	outputs	metadata	toolbar	loc
code
markdown
raw