Skip to content

getting start with tree sitter¤

installing tree sitter for python¤

    def task_setup_tree_sitter():
        import tree_sitter, pathlib, shutil
        target = pathlib.Path("vendor/tree-sitter-python/.git/HEAD")
        yield dict(
            name="clone",
            actions=[
                "git clone https://github.com/tree-sitter/tree-sitter-python vendor/tree-sitter-python --depth 1"
            ], targets=[target], uptodate=[target.exists], clean=["rm -rf vendor"]
        )
        yield dict(
            name="compile",
            actions=[(tree_sitter.Language.build_library, ('build/my-languages.so', ['vendor/tree-sitter-python']))],
            file_dep=[target], targets=["build/my-languages.so"], clean=["rm build/my-languages.so"]
        )
    if I := __name__ == "__main__":
        %reload_ext doit
        %doit setup_tree_sitter
-- setup_tree_sitter:clone
-- setup_tree_sitter:compile

loading a bunch of python code¤

we have some nice dataframes in a prior post that we'll use for demonstration

    with __import__("importnb").Notebook(): from tonyfast.xxiii.__duckdb_search import *
    if I := "__file__" not in locals():
        cells = get_cells_frame("..")
        cells.source = cells.source.apply("".join)

some of the cells might have pidgy syntax so lets sort that otu.

    if I:
        import midgy
        cells = cells.source.str.contains("%(re)?load_ext\s+(pidgy)").groupby("file").any().rename("pidgy").pipe(cells.join)
        cells.loc[cells[cells.pidgy].index, "source"] = cells[cells.pidgy].source.apply(midgy.Python().render)
/tmp/ipykernel_994317/2050936253.py:3: UserWarning: This pattern is interpreted as a regular expression, and has match groups. To actually get the groups, use str.extract.
  cells = cells.source.str.contains("%(re)?load_ext\s+(pidgy)").groupby("file").any().rename("pidgy").pipe(cells.join)

tree sitting parser¤

    if I:
        import tree_sitter
        parser = tree_sitter.Parser()
        parser.set_language(language := tree_sitter.Language("build/my-languages.so", "python"))
        display(parser)
<tree_sitter.Parser at 0x7f5d2b29cc70>

tree sitting parsed¤

    if I:
        sitter = cells.source.apply(compose_left(str.encode, parser.parse))
        sexp = sitter.apply(compose_left(operator.attrgetter("root_node"), operator.methodcaller("sexp")))
        display(sexp.to_frame("s-expression"))
s-expression
file cell_ct
../regexs.ipynb 0 (module (comment))
1 (module (import_statement name: (dotted_name (...
2 (module (comment) (expression_statement (compa...
3 (module (expression_statement (assignment left...
4 (module)
... ... ...
../xxii/2022-12-23-mkdocs-plugin.ipynb 8 (module (ERROR (identifier) (identifier) (stri...
9 (module (expression_statement (augmented_assig...
../xxiii/vendor/tree-sitter-python/README.md 0 (module (expression_statement (binary_operator...
../xxiii/vendor/tree-sitter-python/bindings/rust/README.md 0 (module (comment) (ERROR (identifier) (identif...
../README.md 0 (module (ERROR (UNEXPECTED '-')) (expression_s...

866 rows × 1 columns