Skip to content

Commit

Permalink
Merge pull request #20 from epinzur/update_build
Browse files Browse the repository at this point in the history
updated build stuff
  • Loading branch information
epinzur authored Jun 21, 2024
2 parents 9beb491 + e1d5a78 commit 8be1644
Show file tree
Hide file tree
Showing 6 changed files with 22 additions and 8 deletions.
2 changes: 2 additions & 0 deletions .github/workflows/publish-to-pypi.yml
Original file line number Diff line number Diff line change
Expand Up @@ -36,5 +36,7 @@ jobs:
run: |
echo "pyroject.toml:"
cat pyproject.toml
poetry install
poetry build
poetry export -f requirements.txt --output requirements.txt
poetry publish
9 changes: 6 additions & 3 deletions colbert_chunk_size_and_k.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,14 +22,17 @@

batch_size = 640

astra_token = os.getenv("ASTRA_DB_TOKEN_COLBERT2")
database_id = os.getenv("ASTRA_DB_ID_COLBERT2")
keyspace = "ragulate"
astra_token = os.getenv("ASTRA_DB_TOKEN")
database_id = os.getenv("ASTRA_DB_ID")
keyspace = "colbert"

import logging
logging.basicConfig(level=logging.INFO)
logging.getLogger("unstructured").setLevel(logging.ERROR)
logging.getLogger("cassandra").setLevel(logging.ERROR)
logging.getLogger("http").setLevel(logging.ERROR)
logging.getLogger("httpx").setLevel(logging.ERROR)



def get_embedding_model(chunk_size: int) -> ColbertEmbeddingModel:
Expand Down
4 changes: 1 addition & 3 deletions open_ai_chunk_size_and_k.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import logging
import os

from langchain_astradb import AstraDBVectorStore
Expand Down Expand Up @@ -26,12 +25,11 @@ def ingest(file_path: str, chunk_size: int, **kwargs):
vector_store = get_vector_store(chunk_size=chunk_size)

chunk_overlap = min(chunk_size / 4, min(chunk_size / 2, 64))
logging.info(f"Using chunk_overlap: {chunk_overlap} for chunk_size: {chunk_size}")

text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
model_name=EMBEDDING_MODEL,
chunk_size=chunk_size,
chunk_overlap=50,
chunk_overlap=chunk_overlap,
)

docs = UnstructuredFileLoader(
Expand Down
5 changes: 3 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,10 @@ langchain-openai = "0.1.3"
pytest = "^8.2.2"

[build-system]
requires = ["poetry-core", "setuptools>=42", "wheel", "pip"]
requires = ["poetry-core", "wheel"]
build-backend = "poetry.core.masonry.api"

[tool.poetry.scripts]
ragulate = "ragulate.cli:main"
test = "pytest"
test_unit = "scripts.test_unit_runner:main"
test_integration = "scripts.test_integration_runner:main"
5 changes: 5 additions & 0 deletions scripts/test_integration_runner.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
import pytest
import sys

def main():
sys.exit(pytest.main(["tests/integration_tests"]))
5 changes: 5 additions & 0 deletions scripts/test_unit_runner.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
import pytest
import sys

def main():
sys.exit(pytest.main(["tests/unit_tests"]))

0 comments on commit 8be1644

Please sign in to comment.