skip to main content

@tonyfast s notebooks

site navigation
notebook summary
title
getting start with tree sitter
description
we have some nice dataframes in a prior post that we'll use for demonstration
cells
14 total
7 code
state
executed in order
kernel
Python [conda env:root] *
language
python
name
conda-root-py
lines of code
41
outputs
4
table of contents
{"kernelspec": {"display_name": "Python [conda env:root] *", "language": "python", "name": "conda-root-py"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.13"}, "widgets": {"application/vnd.jupyter.widget-state+json": {"state": {}, "version_major": 2, "version_minor": 0}}, "title": "getting start with tree sitter", "description": "we have some nice dataframes in a prior post that we'll use for demonstration"}
notebook toolbar
Activate
cell ordering
1

getting start with tree sitter

2

installing tree sitter for python

3
    def task_setup_tree_sitter():
        import tree_sitter, pathlib, shutil
        target = pathlib.Path("vendor/tree-sitter-python/.git/HEAD")
        yield dict(
            name="clone",
            actions=[
                "git clone https://github.com/tree-sitter/tree-sitter-python vendor/tree-sitter-python --depth 1"
            ], targets=[target], uptodate=[target.exists], clean=["rm -rf vendor"]
        )
        yield dict(
            name="compile",
            actions=[(tree_sitter.Language.build_library, ('build/my-languages.so', ['vendor/tree-sitter-python']))],
            file_dep=[target], targets=["build/my-languages.so"], clean=["rm build/my-languages.so"]
        )
4
    if I := __name__ == "__main__":
        %reload_ext doit
        %doit setup_tree_sitter
1 outputs.
-- setup_tree_sitter:clone
-- setup_tree_sitter:compile

5

loading a bunch of python code

6

we have some nice dataframes in a prior post that we'll use for demonstration

7
    with __import__("importnb").Notebook(): from tonyfast.xxiii.__duckdb_search import *
8
    if I := "__file__" not in locals():
        cells = get_cells_frame("..")
        cells.source = cells.source.apply("".join)
9

some of the cells might have pidgy syntax so lets sort that otu.

10
    if I:
        import midgy
        cells = cells.source.str.contains("%(re)?load_ext\s+(pidgy)").groupby("file").any().rename("pidgy").pipe(cells.join)
        cells.loc[cells[cells.pidgy].index, "source"] = cells[cells.pidgy].source.apply(midgy.Python().render)
1 outputs.
/tmp/ipykernel_994317/2050936253.py:3: UserWarning: This pattern is interpreted as a regular expression, and has match groups. To actually get the groups, use str.extract.
  cells = cells.source.str.contains("%(re)?load_ext\s+(pidgy)").groupby("file").any().rename("pidgy").pipe(cells.join)

11

tree sitting parser

12
    if I:
        import tree_sitter
        parser = tree_sitter.Parser()
        parser.set_language(language := tree_sitter.Language("build/my-languages.so", "python"))
        display(parser)
1 outputs.
<tree_sitter.Parser at 0x7f5d2b29cc70>
13

tree sitting parsed

14
    if I:
        sitter = cells.source.apply(compose_left(str.encode, parser.parse))
        sexp = sitter.apply(compose_left(operator.attrgetter("root_node"), operator.methodcaller("sexp")))
        display(sexp.to_frame("s-expression"))
1 outputs.
s-expression
file cell_ct
../regexs.ipynb 0 (module (comment))
1 (module (import_statement name: (dotted_name (...
2 (module (comment) (expression_statement (compa...
3 (module (expression_statement (assignment left...
4 (module)
... ... ...
../xxii/2022-12-23-mkdocs-plugin.ipynb 8 (module (ERROR (identifier) (identifier) (stri...
9 (module (expression_statement (augmented_assig...
../xxiii/vendor/tree-sitter-python/README.md 0 (module (expression_statement (binary_operator...
../xxiii/vendor/tree-sitter-python/bindings/rust/README.md 0 (module (comment) (ERROR (identifier) (identif...
../README.md 0 (module (ERROR (UNEXPECTED '-')) (expression_s...

866 rows × 1 columns