decline in oss participation¤
the decline of participation in the jupyterlab front ends community. there is data to demonstrate the attrition of people and organizations.
issues = pipe(
# mapping of year to github issue
"""2024 229
2023 205
2023 170
2022 152
2022 135
2021 128
2021 117""", str.splitlines,
map(str.split), map(reversed), dict, Series
).to_frame("year").rename_axis("issue")
issues = issues.set_index(
issues.index.map(
"https://api.github.com/repos/jupyterlab/frontends-team-compass/issues/{}/comments?per_page=100".format
)
)
issues = (await issues.index.http.get()).explode().series().join(issues).set_index("id")
transform issues bodies to html
issues = issues.assign(html=issues.body.apply(shell.tangle.parser.parser.render))
issues = issues.assign(soup=issues.html.apply(bs4.BeautifulSoup, features="lxml"))
issues = issues.assign(attendees=issues.html.apply(io.StringIO).apply(
excepts(BaseException, pandas.read_html)
).apply(excepts(BaseException, get(0))))
do work on the dates so we can get them as numerical values
issues = issues.assign(
date=issues.soup.methodcaller("select_one", "h1,h2,h3").dropna().attrgetter("contents").itemgetter(0).apply("".join)
)
issues.date = issues.date.str.split("(", expand=True)[0]
issues.date = (
issues.date
.str.removeprefix("JupyterLab Weekly Call, ")
.str.removeprefix("JupyterLab Weekly Call - ")
.str.removeprefix("Weekly Meeting")
.str.removeprefix("Wednesday, ")
)
number_first =issues.date.str.lstrip().str.startswith(tuple(string.digits)).fillna(False)
issues.loc[number_first, "date"] = issues[number_first].apply(
excepts(BaseException, lambda x: operator.itemgetter(1, 0, 2)(x.date.split())), axis=1
).apply(" ".join)
dates = issues.date.str.split().dropna().series(1)
dates[1] = dates[1].str.rstrip(","+string.ascii_letters)
noyear = dates[dates[2].isna()].index
dates[2] = issues.created_at.pipe(pandas.to_datetime).attrgetter("year").astype(str)
issues.loc[dates.index, "date"] = dates.fillna("").apply(" ".join, axis=1)
issues.date = issues.date.pipe(pandas.to_datetime, format="mixed")
total quarterly attendees¤
affiliation = issues[["date"]].join(
pandas.concat(issues.attendees.to_dict()).rename_axis(("id", None), axis=0)
).set_index("date").dropna(subset="Name").Affiliation.str.lower().str.lstrip("@")
quarterly = affiliation.groupby(pandas.Grouper(freq="3ME")).count().iloc[1:]
quarterly.plot(xlabel="quarterly attendees", grid=True)
top organization attendences¤
orgs = affiliation.groupby(pandas.Grouper(freq="6ME")).apply(Series.value_counts).unstack()
top = orgs.sum().sort_values().iloc[::-1].iloc[:15]
orgs = orgs.loc[:, top.index].fillna(0).iloc[1:]
orgs