extracting a dependency graph from importlib_metadata
ยค
%reload_ext pidgy
import importlib_metadata, pandas, networkx
from toolz.curried import *
%reload_ext pidgy
import importlib_metadata, pandas, networkx
from toolz.curried import *
@functools.lru_cache # cache this because the result will always be the same and parsing can be costly
def get_tidy_dist() -> pandas.DataFrame:
`get_tidy_dist` creates a tidy dataframe of the required distributions in this environment
return get_dists().loc["Requires-Dist"].apply(
compose_left(pkg_resources.parse_requirements, first, vars, pandas.Series)
)
@functools.lru_cache # cache this because the result will always be the same and parsing can be costly
def get_tidy_dist() -> pandas.DataFrame:
get_tidy_dist
creates a tidy dataframe of the required distributions in this environment
return get_dists().loc["Requires-Dist"].apply(
compose_left(pkg_resources.parse_requirements, first, vars, pandas.Series)
)
def get_dists():
`get_dists` iterates through the `importlib_metadata.distributions` extracting the known metadata.
return pandas.Series(
dict((x.name, x.metadata._headers) for x in importlib_metadata.distributions())
).rename_axis(index=["project"]).explode().apply(
pandas.Series, index=["key", "value"]
).set_index("key", append=True).reorder_levels((1, 0), 0)["value"]
def get_dists():
get_dists
iterates through the importlib_metadata.distributions
extracting the known metadata.
return pandas.Series(
dict((x.name, x.metadata._headers) for x in importlib_metadata.distributions())
).rename_axis(index=["project"]).explode().apply(
pandas.Series, index=["key", "value"]
).set_index("key", append=True).reorder_levels((1, 0), 0)["value"]
applying the functionssยค
generate the pandas.DataFrame
of the dependency graph and metadata
(df := get_tidy_dist()).head().style.set_caption("the metadata associated with my known python dependecies")
(df := get_tidy_dist()).head().style.set_caption("the metadata associated with my known python dependecies")
cast the tidy data as a networkx
graph
G =df.reset_index().pipe(networkx.from_pandas_edgelist, source="project", target="name")
G =df.reset_index().pipe(networkx.from_pandas_edgelist, source="project", target="name")
df.name.value_counts().to_frame("count").head(20).T.style.set_caption("a table counting the frequency of specific distributions.")
df.name.value_counts().to_frame("count").head(20).T.style.set_caption("a table counting the frequency of specific distributions.")
draw the graph n matplotlib
matplotlib.pyplot.gcf().set_size_inches((20, 20))
networkx.draw_networkx(G)
matplotlib.pyplot.gcf().set_size_inches((20, 20))
networkx.draw_networkx(G)