getting start with tree sitter¤
installing tree sitter for python¤
def task_setup_tree_sitter():
import tree_sitter, pathlib, shutil
target = pathlib.Path("vendor/tree-sitter-python/.git/HEAD")
yield dict(
name="clone",
actions=[
"git clone https://github.com/tree-sitter/tree-sitter-python vendor/tree-sitter-python --depth 1"
], targets=[target], uptodate=[target.exists], clean=["rm -rf vendor"]
)
yield dict(
name="compile",
actions=[(tree_sitter.Language.build_library, ('build/my-languages.so', ['vendor/tree-sitter-python']))],
file_dep=[target], targets=["build/my-languages.so"], clean=["rm build/my-languages.so"]
)
if I := __name__ == "__main__":
%reload_ext doit
%doit setup_tree_sitter
loading a bunch of python code¤
we have some nice dataframes in a prior post that we'll use for demonstration
with __import__("importnb").Notebook(): from tonyfast.xxiii.__duckdb_search import *
if I := "__file__" not in locals():
cells = get_cells_frame("..")
cells.source = cells.source.apply("".join)
some of the cells might have pidgy
syntax so lets sort that otu.
if I:
import midgy
cells = cells.source.str.contains("%(re)?load_ext\s+(pidgy)").groupby("file").any().rename("pidgy").pipe(cells.join)
cells.loc[cells[cells.pidgy].index, "source"] = cells[cells.pidgy].source.apply(midgy.Python().render)
tree sitting parser¤
if I:
import tree_sitter
parser = tree_sitter.Parser()
parser.set_language(language := tree_sitter.Language("build/my-languages.so", "python"))
display(parser)
tree sitting parsed¤
if I:
sitter = cells.source.apply(compose_left(str.encode, parser.parse))
sexp = sitter.apply(compose_left(operator.attrgetter("root_node"), operator.methodcaller("sexp")))
display(sexp.to_frame("s-expression"))