index
execution_count
cell_type
toolbar
started_at
completed_at
source
loc
metadata
outputs
1
unexecuted
In
[
]
markdown
# build the site docs
building documentation with `nbconvert` and dataframes. this approach treats document as data using the
`nbformat` at the primary interface for multimedia content.
the following is an example rendering https://tonyfast.github.io/tonyfast/draft/tonyfast/tonyfast/tonyfast/xxiii/2023-12-13-jupyter-community-call.html . something got wonky with the paths.
metadata
6
building documentation with
nbconvert
and dataframes. this approach treats document as data using the
nbformat
at the primary interface for multimedia content.
the following is an example rendering
https://tonyfast.github.io/tonyfast/draft/tonyfast/tonyfast/tonyfast/xxiii/2023-12-13-jupyter-community-call.html
. something got wonky with the paths.
2
executed
In
[
1
]
code
import tonyfast.utils , pandas , json , nbconvert , nbformat , operator , bs4 , anyio , pathlib , re , os
__import__ ( "nest_asyncio" ) . apply ()
metadata
2
0 outputs.
Out
[
1
]
3
executed
In
[
2
]
code
if MAIN := __name__ == "__main__" :
class Config :
dir = pathlib . Path ( globals () . get ( "__file_" ) or "2024-02-21-build-docs-pd.ipynb" ) . absolute () . parent . parent . parent
paths = [ "tonyfast" ]
target = pathlib . Path ( "site/draft" )
exporter = nbconvert . get_exporter ( "a11y" )(
exclude_input_prompt = True ,
include_sa11y = True ,
exclude_output_prompt = True ,
hide_anchor_links = True ,
include_settings = True ,
exclude_anchor_links = True ,
embed_images = True ,
validate_nb = False ,
include_visibility = True
)
FILE = globals () . get ( "__file_" ) or os . environ . get ( "WRITE" )
async def from_notebook_node ( nb , resources = None , exporter = Config . exporter ):
return exporter . from_notebook_node ( nb , resources = resources )[ 0 ]
metadata
19
0 outputs.
Out
[
2
]
4
unexecuted
In
[
]
markdown
load in all the notebooks
find all the notebooks resembling a post.
we are skipping the work needing to be done on the indexes and readmes.
indexes and readmes use different exporter configurations than content notebooks.
metadata
5
load in all the notebooks
find all the notebooks resembling a post.
we are skipping the work needing to be done on the indexes and readmes.
indexes and readmes use different exporter configurations than content notebooks.
5
executed
In
[
3
]
code
df = (
Config . dir / pandas . Index ( Config . paths )
) . rename ( "directory" ) . path (
) . glob ( "**/[0-9][0-9][0-9][0-9]-*.ipynb" , recursive = True ) . apath () . pipe (
pandas . Index , name = "files"
) . to_series ()
metadata
6
0 outputs.
Out
[
3
]
6
unexecuted
In
[
]
markdown
metadata
1
remove checkpoint files.
7
executed
In
[
4
]
code
df = df . loc [ ~ df . astype ( str ) . str . contains ( "checkpoint" )] . pipe ( pandas . Index ) . rename ( "file" )
metadata
1
0 outputs.
Out
[
4
]
8
unexecuted
In
[
]
markdown
extract the date from the title. this can later be enriched with git information
metadata
1
extract the date from the title. this can later be enriched with git information
9
executed
In
[
5
]
code
TITLE = "(?P<year>[0-9] {4} )-(?P<month>[0-9] {2} )-(?P<day>[0-9]{1,2})-(?P<slug>.+)"
df = df . apath . stem . str . extract ( TITLE ) . set_index ( df ) . dropna ( how = "all" )
df [ "date" ] = pandas . to_datetime ( df . year + "-" + df . month + "-" + df . day )
df = df . sort_values ( "date" , ascending = False )
metadata
4
0 outputs.
Out
[
5
]
10
unexecuted
In
[
]
markdown
read in all the notebooks
metadata
1
read in all the notebooks
11
executed
In
[
6
]
code
df = ( await df . index . apath . read_text ()) . apply ( json . loads ) \
. rename ( "nb" ) . apply ( nbformat . from_dict ) . pipe ( df . join )
if not FILE :
df = df . head ( 10 )
df
metadata
5
1 outputs.
Out
[
6
]
year
month
day
slug
date
nb
file
/home/tbone/Documents/tonyfast/tonyfast/xxiv/2024-03-15-screen-tests.ipynb
2024
03
15
screen-tests
2024-03-15
{'cells': [{'attachments': {}, 'cell_type': 'm...
/home/tbone/Documents/tonyfast/tonyfast/xxiv/2024-03-13-notebooks-for-all-march-call.ipynb
2024
03
13
notebooks-for-all-march-call
2024-03-13
{'cells': [{'cell_type': 'markdown', 'id': '11...
/home/tbone/Documents/tonyfast/tonyfast/xxiv/2024-03-12-nbconvert-a11y-pres.ipynb
2024
03
12
nbconvert-a11y-pres
2024-03-12
{'cells': [{'cell_type': 'markdown', 'id': '39...
/home/tbone/Documents/tonyfast/tonyfast/xxiv/2024-03-11-NOA.ipynb
2024
03
11
NOA
2024-03-11
{'cells': [{'attachments': {'8ea12e8f-45e6-4ea...
/home/tbone/Documents/tonyfast/tonyfast/xxiv/2024-03-06-mast-revisit.ipynb
2024
03
06
mast-revisit
2024-03-06
{'cells': [{'cell_type': 'markdown', 'id': '55...
/home/tbone/Documents/tonyfast/tonyfast/xxiv/2024-03-05-traceback-figure-tables.ipynb.ipynb
2024
03
05
traceback-figure-tables.ipynb
2024-03-05
{'cells': [{'cell_type': 'markdown', 'id': '93...
/home/tbone/Documents/tonyfast/tonyfast/xxiv/2024-03-05-astropy-table-repr.ipynb
2024
03
05
astropy-table-repr
2024-03-05
{'cells': [{'attachments': {'edb162e8-136d-437...
/home/tbone/Documents/tonyfast/tonyfast/xxiv/2024-03-04-semantic-outputs.ipynb
2024
03
04
semantic-outputs
2024-03-04
{'cells': [{'cell_type': 'markdown', 'id': '25...
/home/tbone/Documents/tonyfast/tonyfast/xxiv/2024-03-01-a11y-list-string.ipynb.ipynb
2024
03
01
a11y-list-string.ipynb
2024-03-01
{'cells': [{'attachments': {'931392ee-8c73-455...
/home/tbone/Documents/tonyfast/tonyfast/xxiv/2024-03-01-notebook-accessibility-workshop.ipynb
2024
03
01
notebook-accessibility-workshop
2024-03-01
{'cells': [{'cell_type': 'markdown', 'id': '34...
12
unexecuted
In
[
]
markdown
the notebooks require preparation before they can transform to html
metadata
1
the notebooks require preparation before they can transform to html
13
executed
In
[
7
]
code
MIDGY = re . compile ( "^\%\%[\s+,(pidgy),(midgy)]" )
def prepare_cell ( cell ):
"""make inplace changes to the notebook that carried through the publishing process"""
cell . source = "" . join ( cell . source )
if MIDGY . match ( cell . source ):
cell . metadata . setdefault ( "jupyter" , {})[ "source_hidden" ] = True
for out in cell . get ( "outputs" , "" ):
for k , v in out . get ( "data" , {}) . items (): k . startswith ( "text" ) and out [ "data" ] . __setitem__ ( k , "" . join ( v ))
if "text" in out : out . text = "" . join ( out . text )
return cell
metadata
11
0 outputs.
Out
[
7
]
14
executed
In
[
8
]
code
cells = df . nb . itemgetter ( "cells" ) . enumerate ( "index" ) . apply ( prepare_cell ) . series ()
code = cells . loc [ cells . cell_type . eq ( "code" ), :]
_idgy = code [ code . source . str . contains ( "\s*%(?:re)load_ext\s+[pm]idgy" )]
metadata
3
0 outputs.
Out
[
8
]
15
executed
In
[
9
]
code
df . loc [ _idgy . index . get_level_values ( 0 ) . drop_duplicates ()] . apply (
lambda x : [
y [ "metadata" ] . setdefault ( "jupyter" , {}) . setdefault ( "source_hidden" , True )
for y in x . loc [ "nb" ][ "cells" ] if y [ "cell_type" ] == "code"
] and None , axis = 1
);
metadata
6
0 outputs.
Out
[
9
]
16
executed
In
[
10
]
code
def render_markdown_output ( output ):
if "data" in output :
if "text/markdown" in output [ "data" ]:
md = Config . exporter . environment . globals [ "markdown" ]( output [ "data" ][ "text/markdown" ])
output [ "data" ][ "text/html" ] = md
return md
metadata
7
0 outputs.
Out
[
10
]
17
executed
In
[
11
]
code
outputs = cells . outputs . dropna () . enumerate ( "output" ) . dropna ()
outputs . apply ( render_markdown_output );
markdowns = cells [ cells . cell_type . eq ( "markdown" )] . apply (
lambda s : operator . setitem (
s . metadata . setdefault ( "data" , {}),
"text/html" ,
html := Config . exporter . environment . filters [ "markdown2html" ]( dict ( cell = s ), s . source ),
)
or html ,
axis = 1 ,
) . to_frame ( "html" ) . assign ( output =- 1 ) . set_index ( "output" , append = True )
metadata
11
0 outputs.
Out
[
11
]
18
unexecuted
In
[
]
markdown
screate intermediate representations of markdown. when we handle this work before templating we can use partial information from the outcome
to build the table of contents and relative links.
metadata
2
screate intermediate representations of markdown. when we handle this work before templating we can use partial information from the outcome
to build the table of contents and relative links.
19
executed
In
[
12
]
code
html = pandas . concat (
[
markdowns ,
outputs . itemgetter ( "data" ) . dropna () . itemgetter ( "text/html" ) . dropna () . to_frame ( "html" ),
]
) . sort_index ()
html [ "soup" ] = html . html . apply ( bs4 . BeautifulSoup , features = "lxml" )
metadata
9
0 outputs.
Out
[
12
]
20
unexecuted
In
[
]
markdown
extract the headings from each cell
metadata
1
extract the headings from each cell
21
executed
In
[
13
]
code
html [ "h" ] = html . soup . methodcaller ( "select" , "h1,h2,h3,h4,h5,h6" )
h = html . h . enumerate ( "h" ) . dropna ()
metadata
2
0 outputs.
Out
[
13
]
22
unexecuted
In
[
]
markdown
expand the headings into features on the dataframe
metadata
1
expand the headings into features on the dataframe
23
executed
In
[
14
]
code
h = h . to_frame ( "h" ) . assign (
level = h . attrgetter ( "name" ) . str . lstrip ( "h" ) . astype ( int ),
string = h . attrgetter ( "text" ) . str . rstrip ( "¶" ),
id = h . attrgetter ( "attrs" ) . itemgetter ( "id" )
); h . head ()
metadata
5
1 outputs.
Out
[
14
]
h
level
string
id
file
index
output
h
/home/tbone/Documents/tonyfast/tonyfast/xxiv/2024-03-01-a11y-list-string.ipynb.ipynb
0
-1
0
[using microdata and semantic html to represen...
1
using microdata and semantic html to represent...
None
4
0
0
[representing constants]
2
representing constants
representing-constants
5
0
0
[representing strings]
2
representing strings
representing-strings
6
0
0
[representing numbers]
2
representing numbers
representing-numbers
7
-1
0
[representing containers]
2
representing containers
None
24
unexecuted
In
[
]
markdown
extract the document title from the headings. _we should probably extract a description too.
adding description to the meta is good for accessibility when choosing tabs.
metadata
2
extract the document title from the headings. _we should probably extract a description too.
adding description to the meta is good for accessibility when choosing tabs.
25
executed
In
[
15
]
code
metadata
1
1 outputs.
Out
[
15
]
h
level
string
id
file
index
output
h
/home/tbone/Documents/tonyfast/tonyfast/xxiv/2024-03-01-a11y-list-string.ipynb.ipynb
0
-1
0
[using microdata and semantic html to represen...
1
using microdata and semantic html to represent...
None
4
0
0
[representing constants]
2
representing constants
representing-constants
5
0
0
[representing strings]
2
representing strings
representing-strings
6
0
0
[representing numbers]
2
representing numbers
representing-numbers
7
-1
0
[representing containers]
2
representing containers
None
...
...
...
...
...
...
...
...
/home/tbone/Documents/tonyfast/tonyfast/xxiv/2024-03-13-notebooks-for-all-march-call.ipynb
0
-1
0
[notebooks for all community summary]
1
notebooks for all community summary
None
2
-1
0
[full video]
2
full video
None
4
-1
0
[clips]
2
clips
None
/home/tbone/Documents/tonyfast/tonyfast/xxiv/2024-03-15-screen-tests.ipynb
0
-1
0
[revisiting nbviewer/nbconvert screen reader e...
1
revisiting nbviewer/nbconvert screen reader ex...
None
5
-1
0
[creating an accessible version of the document]
2
creating an accessible version of the document
None
78 rows × 4 columns
26
executed
In
[
16
]
code
df . assign ( title = h . groupby ( h . index . get_level_values ( "file" )) . apply (
lambda s : s . sort_values ( "level" ) . string . iloc [ 0 ]
))
metadata
4
1 outputs.
Out
[
16
]
year
month
day
slug
date
nb
title
file
/home/tbone/Documents/tonyfast/tonyfast/xxiv/2024-03-15-screen-tests.ipynb
2024
03
15
screen-tests
2024-03-15
{'cells': [{'attachments': {}, 'cell_type': 'm...
revisiting nbviewer/nbconvert screen reader ex...
/home/tbone/Documents/tonyfast/tonyfast/xxiv/2024-03-13-notebooks-for-all-march-call.ipynb
2024
03
13
notebooks-for-all-march-call
2024-03-13
{'cells': [{'cell_type': 'markdown', 'id': '11...
notebooks for all community summary
/home/tbone/Documents/tonyfast/tonyfast/xxiv/2024-03-12-nbconvert-a11y-pres.ipynb
2024
03
12
nbconvert-a11y-pres
2024-03-12
{'cells': [{'cell_type': 'markdown', 'id': '39...
semantically meaningful notebooks
/home/tbone/Documents/tonyfast/tonyfast/xxiv/2024-03-11-NOA.ipynb
2024
03
11
NOA
2024-03-11
{'cells': [{'attachments': {'8ea12e8f-45e6-4ea...
Astronomy Notebooks for All
/home/tbone/Documents/tonyfast/tonyfast/xxiv/2024-03-06-mast-revisit.ipynb
2024
03
06
mast-revisit
2024-03-06
{'cells': [{'cell_type': 'markdown', 'id': '55...
revisit building the mast notebooks
/home/tbone/Documents/tonyfast/tonyfast/xxiv/2024-03-05-traceback-figure-tables.ipynb.ipynb
2024
03
05
traceback-figure-tables.ipynb
2024-03-05
{'cells': [{'cell_type': 'markdown', 'id': '93...
using tables to structure tracebacks
/home/tbone/Documents/tonyfast/tonyfast/xxiv/2024-03-05-astropy-table-repr.ipynb
2024
03
05
astropy-table-repr
2024-03-05
{'cells': [{'attachments': {'edb162e8-136d-437...
astropy tables as semantic tables
/home/tbone/Documents/tonyfast/tonyfast/xxiv/2024-03-04-semantic-outputs.ipynb
2024
03
04
semantic-outputs
2024-03-04
{'cells': [{'cell_type': 'markdown', 'id': '25...
improved output semantic for python objects
/home/tbone/Documents/tonyfast/tonyfast/xxiv/2024-03-01-a11y-list-string.ipynb.ipynb
2024
03
01
a11y-list-string.ipynb
2024-03-01
{'cells': [{'attachments': {'931392ee-8c73-455...
using microdata and semantic html to represent...
/home/tbone/Documents/tonyfast/tonyfast/xxiv/2024-03-01-notebook-accessibility-workshop.ipynb
2024
03
01
notebook-accessibility-workshop
2024-03-01
{'cells': [{'cell_type': 'markdown', 'id': '34...
notebooks and accessibility workshop
27
executed
In
[
17
]
code
df = df . assign ( title = h . groupby ( h . index . get_level_values ( "file" )) . apply (
lambda s : s . sort_values ( "level" ) . string . iloc [ 0 ]
) . rename ( "title" ))
metadata
3
0 outputs.
Out
[
17
]
28
executed
In
[
18
]
code
df = df . assign ( description = html . soup . methodcaller ( "select_one" , "p" ) . dropna () . attrgetter ( "text" ) . groupby (
"file"
) . apply ( lambda x : x . sort_index () . iloc [ 0 ]) . rename ( "description" ) . reindex ( df . index ))
metadata
3
0 outputs.
Out
[
18
]
29
executed
In
[
19
]
code
df . apply (
lambda x : (
x . title and x . loc [ "nb" ] . metadata . setdefault ( "title" , x . title ),
x . description and x . loc [ "nb" ] . metadata . setdefault ( "description" , x . description )
), axis = 1
);
metadata
6
0 outputs.
Out
[
19
]
30
unexecuted
In
[
]
markdown
make a table of contents `details > nav > ol` for a dataframe
metadata
1
make a table of contents
details > nav > ol
for a dataframe
31
executed
In
[
20
]
code
def make_toc ( df ):
toc = bs4 . BeautifulSoup ( features = "lxml" )
toc . append ( nav := toc . new_tag ( "nav" ))
nav . append ( ol := toc . new_tag ( "ol" ))
last_level = 1
for i , row in df . iterrows ():
if row . string :
if row . level > last_level :
for i in range ( last_level , row . level ):
last_level = i + 1
ol . append ( li := toc . new_tag ( "li" ))
li . append ( ol := toc . new_tag ( "ol" ))
else :
for i in range ( row . level , last_level ):
if i == 1 :
continue
if ol . parent and ol . parent . parent :
ol = ol . parent . parent
ol . append ( li := toc . new_tag ( "li" ))
li . append ( a := toc . new_tag ( "a" ))
a . append ( row . string )
a . attrs . update ( href = F "# { row . id } " )
return toc
metadata
23
0 outputs.
Out
[
20
]
32
unexecuted
In
[
]
markdown
generate the table of contents for each file we have indexed
metadata
1
generate the table of contents for each file we have indexed
33
executed
In
[
21
]
code
df = df . assign ( toc = h . groupby ( h . index . get_level_values ( "file" )) . apply ( make_toc ) . apply ( str ))
metadata
1
0 outputs.
Out
[
21
]
34
unexecuted
In
[
]
markdown
determine the location of the html version of the file.
metadata
1
determine the location of the html version of the file.
35
executed
In
[
22
]
code
df = df . assign ( target = ( await (
Config . target / df . index . apath . relative_to ( Config . dir )
) . apath () . apath . with_suffix ( ".html" ) . apath . absolute ()) . values )
metadata
3
0 outputs.
Out
[
22
]
36
executed
In
[
23
]
code
df = df . assign ( ** pandas . DataFrame ([
[ None ] + df . index . values [: - 1 ] . tolist (), df . index . values , df . index . values [ 1 :] . tolist () + [ None ]
], index = [ "prev" , "file" , "next" ]) . T . set_index ( "file" ))
metadata
3
0 outputs.
Out
[
23
]
37
executed
In
[
24
]
code
def relative_path ( source , target ):
"""compute a relative path from source to target"""
if target :
common = []
if not source . is_absolute ():
source = pathlib . Path ( source ) . absolute ()
if not target . is_absolute ():
target = pathlib . Path ( target ) . absolute ()
for common , ( s , t ) in enumerate ( zip ( source . parts , target . parts )):
if s != t : break
return type ( source )( * [ ".." ] * ( len ( target . parents ) - common ), * target . parts [ common :])
metadata
11
0 outputs.
Out
[
24
]
38
unexecuted
In
[
]
markdown
generate the footer that contains the previous and next links
metadata
1
generate the footer that contains the previous and next links
39
executed
In
[
25
]
code
df = df . assign (
footer = df . apply (
lambda s : ( s . prev and F """<a href=" { relative_path ( s . target , df . loc [ s . prev ] . target ) } " rel="prev><span aria-hidden="true"><</span> { df . loc [ s . prev ] . title } </a><br/>""" or "" )
+ ( s . next and F """<a href=" { relative_path ( s . target , df . loc [ s . next ] . target ) } " rel="next"> { df . loc [ s . next ] . title } <span aria-hidden="true">></span></a><br/>""" or "" ),
axis = 1
)
)
metadata
7
0 outputs.
Out
[
25
]
40
executed
In
[
26
]
code
me = """<p><a href="https://github.com/tonyfast" rel="me" style="font-size: bigger;"
aria-description="opens new tab" target="_blank">@tonyfast</a>s notebooks</p>"""
metadata
2
0 outputs.
Out
[
26
]
41
executed
In
[
27
]
code
df = df . assign (
header = df . apply (
lambda s : me + "<details><summary>site navigation</summary><nav><ol> %s </ol></nav></details>" % "" . join (
F """<li><a href=" { relative_path ( s . target , t . target ) } "> { t . title } </a></li>"""
for i , t in df . iterrows ()
), axis = 1
))
metadata
7
0 outputs.
Out
[
27
]
42
executed
In
[
28
]
code
await df . target . apath . parent . drop_duplicates () . apath . mkdir ( exist_ok = True , parents = True );
metadata
2
0 outputs.
Out
[
28
]
43
executed
In
[
29
]
code
df [ "html" ] = await df [[ "nb" ]] . apply (
lambda s : from_notebook_node ( s [ "nb" ], dict ( toc = df . toc . loc [ s . name ], footer = df . loc [ s . name ] . footer , header = df . loc [ s . name ] . header )), axis = 1 ) . gather ()
metadata
2
0 outputs.
Out
[
29
]
44
executed
In
[
30
]
code
if 0 or FILE :
await df . target . apath . parent . drop_duplicates () . apath . mkdir ( exist_ok = True , parents = True );
await df . apply (
lambda s : print ( F """writing { s . target . as_uri () } """ ) or s . target . write_text ( str ( s . loc [ "html" ])), axis = 1
) . gather ()
else :
df . html . head ( 2 ) . display . iframe () . display ()
metadata
9
2 outputs.
Out
[
30
]
45
executed
In
[
31
]
code
readmes = df . groupby ( df . target . apath . parent / "index.html" ) . apply (
( top_toc := lambda x : x . apply (
lambda y : F """<li><a href=" { relative_path ( y . target . parent / "index.html" , y . target ) } "> { y . loc [ "title" ] } </a></li>""" , axis = 1
) . pipe ( lambda df : "<nav><ul> {} </ul></nav>" . format ( "" . join ( df . values ))))
) . apply ( nbformat . v4 . new_markdown_cell ) . apply ( lambda x : nbformat . v4 . new_notebook ( cells = [ x ])) . to_frame ( "nb" )
metadata
5
0 outputs.
Out
[
31
]
46
executed
In
[
32
]
code
readmes = readmes . assign ( target = Config . target / readmes . index . apath . relative_to ( Config . dir ))
metadata
1
0 outputs.
Out
[
32
]
47
executed
In
[
33
]
code
await readmes . target . apath () . apath . parent . drop_duplicates () . apath . mkdir ( exist_ok = True , parents = True );
metadata
1
0 outputs.
Out
[
33
]
48
executed
In
[
34
]
code
content = readmes . nb . apply (
from_notebook_node , exporter = (
toc_exporter := nbconvert . get_exporter ( "a11y" )(
exclude_input_prompt = True ,
include_sa11y = False ,
exclude_output_prompt = True ,
exclude_anchor_links = True ,
include_toc = False ,
include_summary = False ,
table_pattern = "Region"
)
)
) . gather ()
primary = pandas . Series ([ nbformat . v4 . new_notebook ( cells = [ nbformat . v4 . new_markdown_cell ( df . pipe ( top_toc ))])], index = [ Config . target / "index.html" ]) . apply (
toc_exporter . from_notebook_node
) . apply ( list ) . series ()[ 0 ]
if 0 or FILE :
( await content ) . to_frame ( "html" ) . apply (
lambda x : print ( F "write index { readmes . loc [ x . name ] . target } " ) or readmes . loc [ x . name ] . target . write_text ( str ( x . loc [ "html" ])), axis = 1
)
primary . to_frame ( "html" ) . apply (
lambda x : print ( F "write index { x . name } " ) or x . name . write_text ( str ( x . loc [ "html" ])), axis = 1
)
else :
( await content ) . display . iframe () . display ()
primary . display . iframe () . display ()
metadata
27
2 outputs.
Out
[
34
]
49
unexecuted
In
[
None
]
code
metadata
0
Out
[
None
]