Skip to content

Commit

Permalink
Introduce python-sdk for code2prompt (#47)
Browse files Browse the repository at this point in the history
  • Loading branch information
mufeedvh committed Jan 15, 2025
1 parent fa72ecd commit 612712c
Show file tree
Hide file tree
Showing 13 changed files with 864 additions and 2 deletions.
172 changes: 172 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -86,3 +86,175 @@ $RECYCLE.BIN/

# Windows shortcuts
*.lnk

# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class

# C extensions
*.so

# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST

# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec

# Installer logs
pip-log.txt
pip-delete-this-directory.txt

# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
cover/

# Translations
*.mo
*.pot

# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal

# Flask stuff:
instance/
.webassets-cache

# Scrapy stuff:
.scrapy

# Sphinx documentation
docs/_build/

# PyBuilder
.pybuilder/
target/

# Jupyter Notebook
.ipynb_checkpoints

# IPython
profile_default/
ipython_config.py

# pyenv
# For a library or package, you might want to ignore these files since the code is
# intended to run in multiple environments; otherwise, check them in:
# .python-version

# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock

# UV
# Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
# This is especially recommended for binary packages to ensure reproducibility, and is more
# commonly ignored for libraries.
#uv.lock

# poetry
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
# This is especially recommended for binary packages to ensure reproducibility, and is more
# commonly ignored for libraries.
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
#poetry.lock

# pdm
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
#pdm.lock
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
# in version control.
# https://pdm.fming.dev/latest/usage/project/#working-with-version-control
.pdm.toml
.pdm-python
.pdm-build/

# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
__pypackages__/

# Celery stuff
celerybeat-schedule
celerybeat.pid

# SageMath parsed files
*.sage.py

# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/

# Spyder project settings
.spyderproject
.spyproject

# Rope project settings
.ropeproject

# mkdocs documentation
/site

# mypy
.mypy_cache/
.dmypy.json
dmypy.json

# Pyre type checker
.pyre/

# pytype static type analyzer
.pytype/

# Cython debug symbols
cython_debug/

# PyCharm
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
# and can be added to the global gitignore or merged into this file. For a more nuclear
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
#.idea/

# PyPI configuration file
.pypirc
5 changes: 5 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,10 @@ test = false
bench = false
path = "src/main.rs"

[lib]
name = "code2prompt"
crate-type = ["cdylib", "rlib"]

[dependencies]
clap = { version = "4.0", features = ["derive"] }
handlebars = "4.3"
Expand All @@ -38,6 +42,7 @@ once_cell = "1.19.0"
log = "0.4"
env_logger = "0.11.3"
arboard = "3.4.0"
pyo3 = { version = "0.23", features = ["extension-module", "abi3-py312", "generate-import-lib"] }

[profile.release]
lto = "thin"
Expand Down
32 changes: 30 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
- [Templates](#templates)
- [User Defined Variables](#user-defined-variables)
- [Tokenizers](#tokenizers)
- [Python SDK](#python-sdk)
- [Contribution](#contribution)
- [License](#license)
- [Support The Author](#support-the-author)
Expand All @@ -27,8 +28,9 @@ You can run this tool on the entire directory and it would generate a well-forma

- Quickly generate LLM prompts from codebases of any size.
- Customize prompt generation with Handlebars templates. (See the [default template](src/default_template.hbs))
- Respects `.gitignore`.
- Respects `.gitignore` (can be disabled with `--no-ignore`).
- Filter and exclude files using glob patterns.
- Control hidden file inclusion with `--hidden` flag.
- Display the token count of the generated prompt. (See [Tokenizers](#tokenizers) for more details)
- Optionally include Git diff output (staged files) in the generated prompt.
- Automatically copy the generated prompt to the clipboard.
Expand Down Expand Up @@ -138,7 +140,6 @@ Save the generated prompt to an output file:
```sh
code2prompt path/to/codebase --output=output.txt
```

Print output as JSON:

```sh
Expand Down Expand Up @@ -181,6 +182,18 @@ Disable wrapping code inside markdown code blocks:
code2prompt path/to/codebase --no-codeblock
```

Include hidden files and directories:

```sh
code2prompt path/to/codebase --hidden
```

Skip .gitignore rules:

```sh
code2prompt path/to/codebase --no-ignore
```

- Rewrite the code to another language.
- Find bugs/security vulnerabilities.
- Document the code.
Expand Down Expand Up @@ -254,6 +267,21 @@ Tokenization is implemented using [`tiktoken-rs`](https://github.com/zurawiki/ti

For more context on the different tokenizers, see the [OpenAI Cookbook](https://github.com/openai/openai-cookbook/blob/66b988407d8d13cad5060a881dc8c892141f2d5c/examples/How_to_count_tokens_with_tiktoken.ipynb)

## Python SDK

code2prompt also provides Python bindings for seamless integration into Python applications. The Python SDK offers all the functionality of the CLI tool through an intuitive object-oriented interface.

See [python-sdk/README.md](python-sdk/README.md) for detailed documentation and usage examples.

Example usage:
```python
from code2prompt import CodePrompt

prompt = CodePrompt("./my_project", include_patterns=["*.py"])
result = prompt.generate(encoding="cl100k")
print(result["prompt"])
```

## How is it useful?

`code2prompt` makes it easy to generate prompts for LLMs from your codebase. It traverses the directory, builds a tree structure, and collects information about each file. You can customize the prompt generation using Handlebars templates. The generated prompt is automatically copied to your clipboard and can also be saved to an output file. `code2prompt` helps streamline the process of creating LLM prompts for code analysis, generation, and other tasks.
Expand Down
37 changes: 37 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
[build-system]
requires = ["maturin>=1.4,<2.0"]
build-backend = "maturin"

[project]
name = "code2prompt"
version = "2.0.0"
description = "Python bindings for code2prompt - A tool to generate LLM prompts from codebases"
authors = [
{name = "Mufeed VH", email = "[email protected]"},
]
readme = "README.md"
requires-python = ">=3.12"
classifiers = [
"Development Status :: 5 - Production/Stable",
"Intended Audience :: Developers",
"License :: OSI Approved :: MIT License",
"Programming Language :: Python :: 3",
"Programming Language :: Python :: 3 :: Only",
"Programming Language :: Python :: 3.8",
"Programming Language :: Python :: 3.9",
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
"Programming Language :: Python :: 3.12",
"Programming Language :: Rust",
"Topic :: Software Development :: Libraries :: Python Modules",
]

[project.urls]
Homepage = "https://github.com/mufeedvh/code2prompt"
Documentation = "https://github.com/mufeedvh/code2prompt"
Repository = "https://github.com/mufeedvh/code2prompt"

[tool.maturin]
python-source = "python-sdk"
features = ["pyo3/extension-module"]
module-name = "code2prompt.code2prompt"
Loading

0 comments on commit 612712c

Please sign in to comment.