speed running a dataframe to tableยค
this is a pure dataframe approach to constructing html representations
- the shorter the code distance, the easier it is to reason
- setup your accessors before solving your problem
- what libraries will you need to use in dataframe chaining?
- do they improve your ability to reason?
- encapsulate the complexity
from nbconvert_a11y.table import new
df = DataFrame(numpy.random.randn(100, 100))
visible = 3
cranges = [Index(range(visible)), Index(range(len(df.columns)-visible, len(df.columns)))]
rranges = [Index(range(visible)), Index(range(len(df)-visible, len(df)))]
the table representation establishes the concept of row and columns along with their associated indexes.
our transformation from a dataframe is translating the nominal tidy data columns and indexes
to ordinal table row and column indexes.
we do this this in a few groups 1. the thead
region, 2. the tbody
region, 3. the tfoot
region.
our primary library for building the table is beautiful soup. spending time building out custom accessors for the dataframe objects will prove beneficial when building our final table.
@pandas.api.extensions.register_dataframe_accessor("bs4")
@pandas.api.extensions.register_series_accessor("bs4")
@pandas.api.extensions.register_index_accessor("bs4")
@dataclass
class _BeautifulSoup:
parent = bs4.BeautifulSoup(features="lxml")
object: DataFrame | Series | Index
cast: typing.Callable = None
GROUPS = "thead", "tbody", "table", "tr", "tfoot"
@classmethod
def new(cls, name, *object, **attrs):
element = cls.parent.new_tag(name=name, attrs=attrs)
element.extend(object)
return element
def _cast(self, object):
if self.cast is not None: object = self.cast(object)
if isinstance(object, bs4.Tag): return object
return str(object)
def element(self, name, **attrs):
object = self.object
if isinstance(object, Index): object = object.to_frame()
if isinstance(object, DataFrame):
return object.map(self._cast).map(partial(self.new, name, **attrs))
elif isinstance(object, Series):
return object.apply(self._cast).apply(partial(self.new, name, **attrs))
raise TypeError(F"cant create element for type of {type(self.object)}")
def elements(self, name, axis=1, **attrs):
object = self.object
if isinstance(object, Index): object = object.to_frame()
if isinstance(object, DataFrame):
return object.map(self._cast).apply(lambda row: self.new(name, *row, **attrs), axis=axis)
elif isinstance(object, Series):
return self.new(name, *object.apply(self._cast), **attrs)
raise TypeError(F"cant create elements for type of {type(self.object)}")
for name in "table tbody thead tfoot tr dl ul ol".split(): locals()[name] = functools.partialmethod(elements, name)
for name in "th td li dt dd".split(): locals()[name] = functools.partialmethod(element, name)
del name
%%
after setting up our beautiful soup jigs we can no rip through a dataframe that consolidates to an html table.
1. initialize the thead with the names of the indexes ~~or columns~~
colgroups = [DataFrame([[*df.index.names]]).bs4.th(scope="col")]
for crange in cranges:
1. add the column names to the thead
colgroups.append(DataFrame(df.columns[crange].to_frame().T.values, None, crange).bs4.th(scope="col"))
1. initialize the rowgroups with the thead computed
rowgroups = [pandas.concat(colgroups, axis=1).bs4.tr().bs4.thead()]
for rrange in rranges:
1. iterate throught the row ranges initializing the column groups with the index names
colgroups = [DataFrame(df.index[rrange].values, rrange + df.columns.nlevels).bs4.th(scope="row")]
for crange in cranges:
1. map the dataframe to the oridinal row, group of a table element
colgroups.append(DataFrame(
df.iloc[rrange, crange].values, rrange + df.columns.nlevels, crange + df.index.nlevels
).bs4.td())
rowgroups.append(pandas.concat(colgroups, axis=1).bs4.tr().bs4.tbody())
1. aggregate the statistics of the footer
colgroups = []
index = len(df) + df.columns.nlevels + pandas.RangeIndex(2)
for crange in cranges:
1. the min and max, the extent, enable us to do lots of cool stuff with plotting
stats = df.iloc[:, crange].agg("min max".split())
if not colgroups:
1. write the summary index names
colgroups.append(DataFrame(stats.index.values, index).bs4.th(scope="row"))
1. write the summary values
colgroups.append(DataFrame(stats.values, index, df.index.nlevels + crange).bs4.td())
1. consolidate the footer
rowgroups.append(pandas.concat(colgroups, axis=1).bs4.tr().bs4.tfoot())
1. post process and synthesize the complete table
table = Series(rowgroups).bs4.table(id="demo")
table.insert(0, new("caption", "quickest table draw in the pacific northwest"))
{{table}}