Skip to content

decline in oss participation¤

the decline of participation in the jupyterlab front ends community. there is data to demonstrate the attrition of people and organizations.

issues = pipe(
    # mapping of year to github issue
    """2024 229
2023 205
2023 170
2022 152
2022 135
2021 128
2021 117""", str.splitlines, 
    map(str.split), map(reversed), dict, Series
    ).to_frame("year").rename_axis("issue")
issues = issues.set_index(
    issues.index.map(
        "https://api.github.com/repos/jupyterlab/frontends-team-compass/issues/{}/comments?per_page=100".format
    )
)
issues = (await issues.index.http.get()).explode().series().join(issues).set_index("id")

transform issues bodies to html

issues = issues.assign(html=issues.body.apply(shell.tangle.parser.parser.render))
issues = issues.assign(soup=issues.html.apply(bs4.BeautifulSoup, features="lxml"))
issues = issues.assign(attendees=issues.html.apply(io.StringIO).apply(
    excepts(BaseException, pandas.read_html)
).apply(excepts(BaseException, get(0))))

do work on the dates so we can get them as numerical values

issues = issues.assign(
    date=issues.soup.methodcaller("select_one", "h1,h2,h3").dropna().attrgetter("contents").itemgetter(0).apply("".join)
)
issues.date = issues.date.str.split("(", expand=True)[0]
issues.date = (
    issues.date
    .str.removeprefix("JupyterLab Weekly Call, ")
    .str.removeprefix("JupyterLab Weekly Call - ")
    .str.removeprefix("Weekly Meeting")
    .str.removeprefix("Wednesday, ")
)

number_first =issues.date.str.lstrip().str.startswith(tuple(string.digits)).fillna(False)
issues.loc[number_first, "date"] = issues[number_first].apply(
    excepts(BaseException, lambda x: operator.itemgetter(1, 0, 2)(x.date.split())), axis=1
).apply(" ".join)

dates = issues.date.str.split().dropna().series(1)
dates[1] = dates[1].str.rstrip(","+string.ascii_letters)
noyear = dates[dates[2].isna()].index
dates[2] = issues.created_at.pipe(pandas.to_datetime).attrgetter("year").astype(str)
issues.loc[dates.index, "date"] = dates.fillna("").apply(" ".join, axis=1)
issues.date = issues.date.pipe(pandas.to_datetime, format="mixed")
/tmp/ipykernel_97782/3917024939.py:13: FutureWarning: Downcasting object dtype arrays on .fillna, .ffill, .bfill is deprecated and will change in a future version. Call result.infer_objects(copy=False) instead. To opt-in to the future behavior, set `pd.set_option('future.no_silent_downcasting', True)`
  number_first =issues.date.str.lstrip().str.startswith(tuple(string.digits)).fillna(False)
/home/tbone/Documents/nobook/src/nobook/utils.py:259: FutureWarning: the convert_dtype parameter is deprecated and will be removed in a future version.  Do ``ser.astype(object).apply()`` instead if you want ``convert_dtype=False``.
  return self.object.apply(pandas.Series, *args, **kwargs)

total quarterly attendees¤

affiliation = issues[["date"]].join(
    pandas.concat(issues.attendees.to_dict()).rename_axis(("id", None), axis=0)
).set_index("date").dropna(subset="Name").Affiliation.str.lower().str.lstrip("@")

quarterly = affiliation.groupby(pandas.Grouper(freq="3ME")).count().iloc[1:]
quarterly.plot(xlabel="quarterly attendees", grid=True)
<Axes: xlabel='quarterly attendees'>
No description has been provided for this image

top organization attendences¤

orgs = affiliation.groupby(pandas.Grouper(freq="6ME")).apply(Series.value_counts).unstack()
top = orgs.sum().sort_values().iloc[::-1].iloc[:15]
orgs = orgs.loc[:, top.index].fillna(0).iloc[1:]
orgs 
quantstack aws ibm quansight labs quansight bloomberg anaconda google databricks jp morgan chase netflix cocalc engagelively apple oxford
date
2022-07-31 77.0 45.0 66.0 30.0 13.0 16.0 10.0 24.0 19.0 15.0 14.0 12.0 6.0 4.0 9.0
2023-01-31 89.0 60.0 37.0 32.0 16.0 22.0 7.0 9.0 13.0 15.0 13.0 0.0 6.0 1.0 0.0
2023-07-31 86.0 58.0 12.0 29.0 27.0 20.0 14.0 2.0 10.0 8.0 7.0 1.0 2.0 1.0 0.0
2024-01-31 58.0 44.0 1.0 31.0 30.0 16.0 18.0 1.0 4.0 3.0 7.0 12.0 0.0 2.0 0.0
2024-07-31 43.0 36.0 1.0 19.0 16.0 15.0 26.0 0.0 0.0 0.0 2.0 6.0 0.0 2.0 0.0