Skip to content

Commit

Permalink
[Demo] All files needed for demo run (#1)
Browse files Browse the repository at this point in the history
* all demo parts

* small fix to notebook

* notebook documentation
  • Loading branch information
ZeevRispler authored Sep 30, 2024
1 parent d6e1ae4 commit a9d7e2e
Show file tree
Hide file tree
Showing 24 changed files with 1,953 additions and 0 deletions.
Empty file added jewelry/__init__.py
Empty file.
51 changes: 51 additions & 0 deletions jewelry/data/ingest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
import sys
import os
from dotenv import load_dotenv
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import Chroma
from chromadb.config import Settings

def load_environment():
load_dotenv()
if "OPENAI_API_KEY" not in os.environ:
print("Error: OPENAI_API_KEY is not set in the .env file or environment.")
sys.exit(1)

def ingest_to_chroma(file_path):
with open(file_path, 'r') as file:
text = file.read()

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
chunks = text_splitter.split_text(text)

embeddings = OpenAIEmbeddings()

# Initialize Chroma with persistence
chroma_settings = Settings(
chroma_db_impl='duckdb+parquet',
persist_directory='./chroma_db' # This directory will store the persisted data
)

db = Chroma.from_texts(
texts=chunks,
embedding=embeddings,
client_settings=chroma_settings,
persist_directory='./chroma_db'
)

# Persistence is now handled automatically
print(f"Text from {file_path} has been successfully ingested into Chroma and persisted.")

if __name__ == "__main__":
if len(sys.argv) != 2:
print("Usage: python script_name.py <path_to_text_file>")
sys.exit(1)

file_path = sys.argv[1]
if not os.path.exists(file_path):
print(f"Error: File {file_path} does not exist.")
sys.exit(1)

load_environment()
ingest_to_chroma(file_path)
60 changes: 60 additions & 0 deletions jewelry/data/main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
import os
from sqlalchemy import create_engine
import pandas as pd
from jewelry.data.sql_db import drop_tables, create_tables, get_engine, get_items, get_user_items_purchases_history
from genai_factory.actions import ingest
def init_sql_db(data_path: str = "data", mock_data_path: str = "./data/mock_data", reset: bool = True):
"""
Initialize the SQL database and load the mock data if available.
:param data_path: Data path.
:param mock_data_path: Mock data path.
:param reset: Whether to reset the database.
"""
# Create the base data path if it doesn't exist:
if not os.path.exists(data_path):
os.makedirs(data_path)

# Connect to the SQL database:
sql_connection_url = f"sqlite:///{data_path}/sql.db"
engine = get_engine(sql_connection_url=sql_connection_url)

# Drop the tables if reset is required:
if reset:
drop_tables(engine=engine)

# Create the tables:
create_tables(engine=engine)

# Check if needed to load mock data:
if not mock_data_path:
return

# Load the mock data:
products = pd.read_csv(os.path.join(mock_data_path, "products.csv"))
items = pd.read_csv(os.path.join(mock_data_path, "items.csv"))
users = pd.read_csv(os.path.join(mock_data_path, "users.csv"))
stocks = pd.read_csv(os.path.join(mock_data_path, "stocks.csv"))
purchases = pd.read_csv(os.path.join(mock_data_path, "purchases.csv"))
stock_to_purchase = pd.read_csv(os.path.join(mock_data_path, "stock_to_purchase.csv"))
reviews = pd.read_csv(os.path.join(mock_data_path, "reviews.csv"))

# Insert the mock data into tables:
products.to_sql(name="product", con=engine, if_exists="replace", index=False)
items.to_sql(name="item", con=engine, if_exists="replace", index=False)
users.to_sql(name="user", con=engine, if_exists="replace", index=False)
stocks.to_sql(name="stock", con=engine, if_exists="replace", index=False)
purchases.to_sql(name="purchase", con=engine, if_exists="replace", index=False)
stock_to_purchase.to_sql(name="stock_to_purchase", con=engine, if_exists="replace", index=False)
reviews.to_sql(name="review", con=engine, if_exists="replace", index=False)


if __name__ == "__main__":
init_sql_db()
engine = get_engine(f"sqlite:///data/sql.db")
items = get_items(engine=engine, kinds=["rings", "bracelets"], stones=["no stones"], metals=["white gold"])
print(items)
items = get_user_items_purchases_history(engine=engine, user_id="6")
print(items)


51 changes: 51 additions & 0 deletions jewelry/data/mock_data/documents/customer_policy.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
## Customer Policy for Iguazio Jewelry

**General:**

* We strive to provide excellent customer service and high-quality jewelry products.
* We reserve the right to modify this policy at any time without prior notice.
* By placing an order with us, you acknowledge and agree to be bound by the terms of this policy.

**Orders:**

* We accept orders placed on our website 24/7.
* Orders are processed within 1 day after they are placed.
* Order confirmation emails will be sent to the email address provided at checkout.
* We reserve the right to cancel any order for any reason.

**Payments:**

* We accept visa and mastercard.
* All payments are processed securely through a trusted payment gateway.
* Your credit card information is never stored on our servers.

**Shipping:**

* We offer a variety of shipping options with varying delivery times and costs.
* Shipping costs are calculated based on the weight, size, and destination of your order.
* You will be able to choose your preferred shipping option at checkout.
* We are not responsible for lost, stolen, or damaged packages once they have been shipped.

**Returns and Exchanges:**

* We accept returns and exchanges within 1 month of the purchase date.
* Items must be returned in their original packaging, unworn, and in new condition.
* We do not offer refunds for shipping costs on returned items.
* Please contact us for a return authorization before sending any items back.

**Warranties:**

* All our jewelry comes with a 5-year warranty against manufacturing defects.
* This warranty does not cover damage caused by normal wear and tear, misuse, or accidents.
* Please contact us for warranty claims.

**Privacy:**

* We respect your privacy and are committed to protecting your personal information.
* We will not share your personal information with any third party without your consent.
* You can review our full privacy policy on our website.

**Contact Us:**

* If you have any questions or concerns, please contact us via email at [email protected] or phone at 123456789.
* We will reply to your inquiries as soon as possible.
20 changes: 20 additions & 0 deletions jewelry/data/mock_data/items.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
item_id,date_added,description,colors,metals,stones,stones_carat_total_weight,product_id,image
1,2024-02-15,"A luxurious yellow gold ring adorned with sparkling diamonds, exuding timeless elegance and sophistication.","yellow, clear",yellow gold,diamonds,1.5,1,1.png
2,2024-02-15,"A mesmerizing white gold ring featuring a stunning arrangement of diamonds, symbolizing everlasting beauty and opulence.","white, clear",white gold,diamonds,1.5,1,2.png
3,2024-02-20,"A radiant pink gold bracelet, exuding regal charm and sophistication.",pink,pink gold,no stones,0.0,2,3.png
4,2024-02-20,"An exquisite white gold bracelet, epitomizing luxury and grace.",white,white gold,no stones,0.0,2,4.png
5,2024-02-25,"A majestic yellow gold necklace adorned with glistening diamonds, exuding royal elegance and grandeur.","yellow, clear",yellow gold,diamonds,3.0,3,5.png
6,2024-02-25,"An opulent white gold necklace featuring a breathtaking arrangement of diamonds, capturing the essence of luxury and sophistication.","white, clear",white gold,diamonds,3.0,3,6.png
7,2024-03-01,"Luxurious pink gold earrings adorned with sparkling diamonds, radiating opulent glamour and sophistication.","pink, clear",pink gold,diamonds,2.2,4,7.png
8,2024-03-01,"Elegant white gold earrings featuring a dazzling array of diamonds, exuding timeless beauty and refinement.","white, clear",white gold,diamonds,2.2,4,8.png
9,2024-02-15,"A timeless yellow gold ring adorned with sparkling diamonds, radiating elegance and grace.","yellow, clear",yellow gold,diamonds,1.0,5,9.png
10,2024-02-15,"An exquisite pink gold ring featuring a delicate arrangement of diamonds, epitomizing sophistication and charm.","pink, clear",pink gold,diamonds,1.0,5,10.png
11,2024-03-04,"An elegant white gold necklace adorned with a breathtaking butterfly pendant, symbolizing transformation and beauty.",white,white gold,no stones,0.0,6,11.png
12,2024-03-04,"An elegant pink gold necklace adorned with a breathtaking butterfly pendant, symbolizing transformation and beauty.",pink,pink gold,no stones,0.0,6,12.png
13,2024-03-04,"A mesmerizing white gold necklace featuring delicate leaf motifs and sparkling diamond accents, evoking the enchanting beauty of a mystical forest.",white,white gold,diamonds,0.7,7,13.png
14,2024-03-04,"A mesmerizing yellow gold necklace featuring delicate leaf motifs and sparkling diamond accents, evoking the enchanting beauty of a mystical forest.",yellow,yellow gold,diamonds,0.7,7,14.png
15,2024-02-15,"An enchanting white gold ring adorned with a heart shaped diamond, symbolizing eternal love and romance.","white, clear",white gold,diamonds,1.8,8,15.png
16,2024-02-15,"An enchanting pink gold ring adorned with a heart shaped diamond, symbolizing eternal love and romance.","pink, clear",pink gold,diamonds,1.8,8,16.png
17,2024-02-15,"An enchanting yellow gold ring adorned with a heart shaped diamond, symbolizing eternal love and romance.","yellow, clear",yellow gold,diamonds,1.8,8,17.png
18,2024-02-25,"An exquisite chain pink gold necklace, capturing the essence of timeless elegance and sophistication.",pink,pink gold,no stones,0.0,9,18.png
19,2024-02-25,"An exquisite chain yellow gold necklace, capturing the essence of timeless elegance and sophistication.",yellow,yellow gold,no stones,0.0,9,19.png
153 changes: 153 additions & 0 deletions jewelry/data/mock_data/mocker.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,153 @@
import datetime
import json
import os
import random
import uuid
from typing import List, Tuple

import pandas as pd


def _generate_stocks(
products: List[dict], items: List[dict], min_amount: int = 0, max_amount: int = 5
) -> List[dict]:
price_map = {"rings": 400, "bracelets": 800, "necklaces": 1000, "earrings": 200}
size_map = {
"rings": [3, 5, 7, 9],
"bracelets": [6.3, 6.7, 7.1, 7.9, 9],
"necklaces": [16, 17.7, 19.4],
"earrings": [1],
}

stocks = []
for item in items:
kind = next(p["kind"] for p in products if item["product_id"] == p["product_id"])
price = random.randint(1, 10) * 50 + price_map[kind]
if item["stones"] == "diamonds":
price *= 3
if item["metals"] == "white gold":
price += 200
for size in size_map[kind]:
stocks.append(
{
"stock_id": str(uuid.uuid4()).replace("-", ""),
"item_id": item["item_id"],
"size": size,
"amount": random.randint(min_amount, max_amount),
"price": price,
}
)

return stocks


def _generate_purchases(
users: List[dict],
stocks: List[dict],
min_date: str,
max_date: str,
min_amount: int = 1,
max_amount: int = 4,
) -> Tuple[List[dict], List[dict]]:
min_date = datetime.datetime.strptime(min_date, "%m/%d/%Y")
max_date = datetime.datetime.strptime(max_date, "%m/%d/%Y")

purchases = []
for _ in range(10):
user_id = random.choice(users)["user_id"]
stock_ids = [
stock["stock_id"]
for stock in random.sample(stocks, random.randint(min_amount, max_amount))
]
purchase_id = str(uuid.uuid4()).replace("-", "")
date = min_date + datetime.timedelta(
days=random.randint(0, (max_date - min_date).days)
)
purchases.append(
{
"user_id": user_id,
"stocks": stock_ids,
"date": date.strftime("%m/%d/%Y"),
"purchase_id": purchase_id,
}
)

stock_to_purchase = []
for p in purchases:
for sid in p["stocks"]:
stock_to_purchase.append({"purchase_id": p["purchase_id"], "stock_id": sid})
p.pop("stocks")

return purchases, stock_to_purchase


def _generate_reviews(
stocks: List[dict],
purchases: List[dict],
stock_to_purchase: List[dict],
review_chance: float = 0.5
) -> List[dict]:
reviews = []
for stp in stock_to_purchase:
if random.random() < review_chance:
continue
purchase = next(p for p in purchases if p["purchase_id"] == stp["purchase_id"])
stock = next(s for s in stocks if s["stock_id"] == stp["stock_id"])
date = datetime.datetime.strptime(purchase["date"], "%m/%d/%Y")
date += datetime.timedelta(days=random.randint(1, 7))
rating = random.randint(1, 5)
review = {
"review_id": str(uuid.uuid4()).replace("-", ""),
"item_id": stock["item_id"],
"user_id": purchase["user_id"],
"date": date,
"text": "", # TODO: Generate a review based on randomize score using ChatGPT
"rating": rating,
"is_recommend": rating > 3,
}
reviews.append(review)

return reviews


def generate_mock_data(
sources_directory: str = "./sources", output_directory: str = "./"
):
with open(os.path.join(sources_directory, "products.json")) as json_file:
products = json.load(json_file)
with open(os.path.join(sources_directory, "items.json")) as json_file:
items = json.load(json_file)
with open(os.path.join(sources_directory, "users.json")) as json_file:
users = json.load(json_file)

stocks = _generate_stocks(products=products, items=items)
purchases, stock_to_purchase = _generate_purchases(
users=users, stocks=stocks, min_date="05/03/2024", max_date="07/03/2024"
)
reviews = _generate_reviews(stocks=stocks, purchases=purchases, stock_to_purchase=stock_to_purchase)

products_df = pd.DataFrame(products)
items_df = pd.DataFrame(items)
items_df['date_added'] = pd.to_datetime(items_df['date_added'], format='%m/%d/%Y')
users_df = pd.DataFrame(users)
users_df['date_of_birth'] = pd.to_datetime(users_df['date_of_birth'], format='%m/%d/%Y')
stocks_df = pd.DataFrame(stocks)
purchases_df = pd.DataFrame(purchases)
purchases_df['date'] = pd.to_datetime(purchases_df['date'], format='%m/%d/%Y')
stock_to_purchase_df = pd.DataFrame(stock_to_purchase)
reviews_df = pd.DataFrame(reviews)
reviews_df['date'] = pd.to_datetime(reviews_df['date'], format='%m/%d/%Y')

products_df.to_csv(os.path.join(output_directory, "products.csv"), index=False)
items_df.to_csv(os.path.join(output_directory, "items.csv"), index=False)
users_df.to_csv(os.path.join(output_directory, "users.csv"), index=False)
stocks_df.to_csv(os.path.join(output_directory, "stocks.csv"), index=False)
purchases_df.to_csv(os.path.join(output_directory, "purchases.csv"), index=False)
stock_to_purchase_df.to_csv(
os.path.join(output_directory, "stock_to_purchase.csv"), index=False
)
reviews_df.to_csv(os.path.join(output_directory, "reviews.csv"), index=False)


if __name__ == "__main__":
generate_mock_data()
10 changes: 10 additions & 0 deletions jewelry/data/mock_data/products.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
product_id,name,kind,collections,gifts
1,Eternal Splendor Ring,rings,Forever Diamonds,"wife, anniversary"
2,Regal Enchantment Bracelet,bracelets,McKinsey Elegance,"wife, anniversary"
3,Royal Splendor Necklace,necklaces,Forever Diamonds,"wife, anniversary"
4,Opulent Glamour Earrings,earrings,Forever Diamonds,"wife, anniversary"
5,Timeless Grace Ring,rings,Forever Diamonds,"wife, anniversary"
6,Enchanted Forest Necklace,necklaces,Iguazio Nature Capture,"mom, birthday"
7,Enchanted Leaf Necklace,earrings,"Iguazio Nature Capture, Forever Diamonds","best friend, mom, birthday"
8,Enchanted Love Ring,rings,Forever Diamonds,"wife, anniversary, best friend"
9,Majestic Chain Necklace,necklaces,Timeless,"wife, graduation, best friend, mom"
11 changes: 11 additions & 0 deletions jewelry/data/mock_data/purchases.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
user_id,date,purchase_id
5,2024-05-11,e12e1d1139e3481b8aa53a88cb95a156
2,2024-05-20,1abc839fe844479f9154e5c683264436
1,2024-05-29,773ca8faaf17471a82c0792e2ffeb661
3,2024-07-01,02a941880e674f82ae0430c404c7f62b
3,2024-05-09,72d033b842474d96aaaaf94153a6dc45
3,2024-05-03,f469aa686ea24023b0241786068eab1f
1,2024-05-04,8751e73e3f4b4b758931de66506f71a7
3,2024-05-11,80d7415470294775aec85bffc41f852d
4,2024-06-13,d4592d31cafd44cb8a64a44e64aff624
3,2024-06-29,f315be28cd9f469ea713116faf49b123
16 changes: 16 additions & 0 deletions jewelry/data/mock_data/reviews.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
review_id,item_id,user_id,date,text,rating,is_recommend
75bcf4035378468d8251a58519b05e3e,14,5,2024-05-16,,5,True
28ec7b810ad54bb88907344646107949,2,5,2024-05-18,,4,True
14d7c597917a440c9bff6a8d510a4b9f,6,2,2024-05-24,,4,True
d622a7d9a3884e2f91e05cfbca053c7a,10,2,2024-05-23,,3,False
28bde0397b9a4336929f77b55a3a1bae,2,2,2024-05-25,,5,True
66300279773e40709cc4348da4a814d6,10,1,2024-06-02,,1,False
4c7a9c600eef46cfad6322ed2e14cfa2,6,3,2024-07-02,,1,False
97b4a35f9cdf48479ae3361ecbcbada8,12,3,2024-05-06,,4,True
d7cfcc1c2caa47749439cce3312fd32b,6,3,2024-05-10,,3,False
cd25c018501040a18bf35891561b4e84,9,3,2024-05-07,,2,False
b06e34a24c454af582780b017af37c4c,5,1,2024-05-11,,3,False
a6195ed5ccbd4216b9bd34f79542d00a,17,3,2024-05-13,,4,True
dfd00d8808f44eb081a760688ba0de0a,6,4,2024-06-15,,4,True
fc437a4a1a0645568aca999c29fbb72b,1,3,2024-07-06,,3,False
8da2df90c7c641a88c5b5a174c5386a7,6,3,2024-07-04,,5,True
Loading

0 comments on commit a9d7e2e

Please sign in to comment.