Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Lucylililiwang create fix error handling and clean up client #438

Open
wants to merge 13 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
49 changes: 49 additions & 0 deletions Fix_error_handling_and_clean_up_client.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
# We are importing the require library
import os
import requests
import json

def generate_text(prompt, max_length=50):
try:
# Retrieve OpenAI API key from environment variable
openai_key = os.getenv('OPENAI_API_KEY')
if not openai_key:
raise EnvironmentError("OpenAI API key not found in environmnet variables")

url = "https://api/openai.com/v1/completions"
headers = {
"Content-Type": "application/json",
"Authorization": f"Bearer{openai_key}"
}

# We are prepare JSON payload
data = {
"model": "text-davinci-003",
"prompt": prompt,
"max_tokens": max_length
}

# We are making a POST request to OpenAI API
response = requests.post(url, headers=headers)
# Raise HTTPError for non-2xx status codes
response.raise_for_status()

# We are parse response JSON
result = response.json()
generate_text = result['choices'][0]['text'].strip()
return generate_text

except Exception as e:
print(f"Error occurred: {str(e)}")


# We are define the entry poing of the function
if __name__ == "__main__":
prompt = "What is the meaning of life?"
max_length = 100

generate_text = generate_text(prompt, max_length)
if generate_text:
print("Generated text:", generate_text)
else:
print("Failed to generate text")
25 changes: 25 additions & 0 deletions LoRa.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
# Import the require library
from network import LoRa
import socket
import time

# We are initialize LoRa module
lora = LoRa(model=LoRa.LORA, frequency=915000000)

# We are Create LoRa socket
s = socket.socket(socket.AF_LORA, socket.SOCK_RAW)

# We are set the LoR socket blocking to False
s.setblocking(False)

# We are starting an while loop
while True:
# We are sending some data
s.send("Hello LoRa")

# We are wait for incoming messages
data = s.recv(64)
if data:
print("Received", data)

time.sleep(2)
Empty file added dataset.csv
Empty file.
44 changes: 44 additions & 0 deletions evaluation_script.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
# We are import the require library
import sys
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from torch.utils.data import Dataset, DataLoader
from tqdm import tqdm

# We are define your custom dataset class if needed
class CustomDataset(Dataset):
def __init__(self, data):
self.data = data

def __len__(self):
return len(self.data)

def __getitem__(self, idx):
return self.data[idx]

# We are define the evaluation function
def evaluate_models(model_names, test_datset, tokenizer, device="cuda" if torch.cuda.is_available() else "cpu"):
for model_name in model_names:
print(f"Evaluating model: {model_name}")

# We are Load the fine-tuned model
model = AutoModelForCausalLM.from_pretrained(f"fine_tuned_{model_name}").to(device)

# We are create data loader for evaluation
test_loader = DataLoader(test_datset, batch_size=8, shuffle=False)

# We are evaluation loop
total_loss = 0.0
total_samples = 0

model.eval()
with torch.no_grad():
for batch in tqdm(test_loader, desc="Evalatiing"):
input_ids = tokenizer(batch, return_tensors="pt", padding=True, truncation=True)["input_ids"].to(device)
labels = input_ids.clone()

outouts = model(input_ids, labels=labels)

loss = outouts.loss
total_loss += loss.item() * input_ids.size(0)
total_samples += input_ids.size(0)
69 changes: 69 additions & 0 deletions fine_tuning_script.py.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
# We are importing the library
import sys
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from torch.utils.data import Dataset, DataLoader
from tqdm import tqdm

# We are define the custom dataset class if needed
class CustomDataset(Dataset):
def __init__(self, data):
self.data = data
def __len__(self):
return len(self.data)
def __getitem__(self, idx):
return self.data[idx]

# We are define the fine-tuning function
def fine_tune_models(model_names, train_dataset, tokenizer, num_epochs=3, batch_size=8,device="cuda" if torch.cuda.is_available() else "cpu"):
for model_name in model_names:
print(f"Fine-tuning model: {model_name}")

# We are load the pre-trained model
model = AutoModelForCausalLM.from_pretrained(model_name).to(device)

# We are define optimizer and scheduler
optimizer = torch.optim.AdamW(model.parameters(), lr=5e-5)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=1, gamma=0.9)

# Creare Data Loader for training
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

# We are Fine-tuning loop
for epoch in range(num_epochs):
model.train()
total_loss = 0.0

for batch in tqdm(train_loader, desc=f"Epoch {epoch + 1}/{num_epochs}"):
input_ids = tokenizer(batch, return_tensors="pt", padding=True, truncation=True)["input_ids"].to(device)
labels = input_ids.clone()

optimizer.zero_grad()
outputs = model(input_ids, labels=labels)

loss = outputs.loss
total_loss += loss.item()

loss.backward()
optimizer.step()
scheduler.step()

# We are print the average loss for the epoch
print(f"Average Loss: {total_loss/ len(train_loader)}")

# We are save the fine-tuned model
model.save_pretrained(f"fine_tuned_{model_name}")

if __name__ == "__main__":
# we are define the models to fine-tune
model_names = [
"facebook/opt-125m",
"facebook/opt-350m",
]

# We are Load preprocessed data and tokenizer
train_dataset = CustomDataset(...)
tokenizer = AutoTokenizer.from_pretrained("tokenizer-name")

# We are fine-tune the models
fine_tune_models(model_names, train_dataset, tokenizer)
20 changes: 20 additions & 0 deletions kubernetes_support
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# We are importing the require library
from kubernetes import client, config

def main():
# We are Load kubeconfig file or in-cluster config
config.load_kube_config()

# We are create Kubernetes API client
api_instance = client.CoreV1Api()

# We are list pods in the default namespace
print("Listing pods with their IPs:")
ret = api_instance.list_pod_for_all_namespaces(watch=False)
for i in ret.items:
print(f"{i.metadata.name} : {i.status.pod_ip}")


# We are define the entry point of the function
if __name__ == "__main__":
main()
43 changes: 43 additions & 0 deletions preprocessing_script.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
import pandas as pd
from sklearn.model_selection import train_test_split
from transformers import AutoTokenizer

# We are define an function to preprocess data
def preprocess_data(csv_file, tokenizer_name, max_length=512, test_size=0.1, random_state=42):
# We first load the csv file into a DataFrame
df = pd.read_csv(csv_file)

# We are Initialize tokenizer
tokenizer = AutoTokenizer.from_pretrained(tokenizer_name)

# We are tokenize text and add tokenized sequences to DataFrame
tokenized_sequences = []
# adjust text_columns according to your dataset
for text in df['text_columns']:
tokenized_text = tokenizer.encode(text, max_length=max_length, truncation=True)
tokenized_sequences.append(tokenized_text)

df['tokenzied_text'] = tokenized_sequences

# We are split the data into training and testing sets
train_df, test_df = train_test_split(df, test_size=test_size, random_state=random_state)

return train_df, test_df

# We are define the entrypoint of the function
if __name__ == "__main__":
# We are setting the parameters
csv_file = "path/to/your/dataset.csv"
tokenizer_name = "tokenizer-name"
max_length = 512
test_size = 0.1
random_state = 42

# We preprocess data
train_df, test_df = preprocess_data(csv_file, tokenizer_name, max_length=max_length, test_size=test_size)

# We are saving the preprocessed data if needed
train_df.to_csv("train.csv", index=False)
test_df.to_csv("test.csv", index=False)

# waiting for future improvement
33 changes: 33 additions & 0 deletions push_pypi.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
name: Publish to PyPI

on:
push:
branches:
- main

jobs:
publish:
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v2

- name: Set up Python
uses: actions/setup-python@v2
with:
python-version: '3.x'

- name: Install dependencies
run: pip install -r requirements.txt

- name: Build package
run: python setup.py sdist bdist_wheel


- name: Publish to PyPI
uses: pypa/gh-action-pypi-publish@master
with:
user: __token__
password: ${{ secrets.PyPI_API_TOKEN}}
distributions: "sdist bdist_wheel"
repository_url: https://upload.pypi.org/legacy/
83 changes: 83 additions & 0 deletions signup_and_Multi-Factor.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
# We are importing the require library
from flask import Flask, render_template, request, redirect, url_for, session
from flask_wtf.csrf import CSRFProtect
from transformers import GPT2LMHeadModel, GPT2Tokenizer
import boto3

app = Flask(__name__)
csrf = CSRFProtect(app)

# We are intialized AWS variable
AWS_REGION = 'Global'
USER_POOL_ID = ''
APP_CLIENT_ID = ''
IDENTITY_POOL_ID = ''

# We are Load the pre-trained GPT-2 model and tokenizer
model = GPT2LMHeadModel.from_pretrained("gpt2")
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")

# We are initialize Congito client
cognito_client = boto3.client('cognito-idp', region_name=AWS_REGION)

# We are define the route
@app.route("/")
def index():
return render_template("index.html")

@app.route("/signup", methods=["GET","POST"])
def signup():
if request.method == "POST":
username = request.form["username"]
password = request.form["password"]
email = request.form["email"]

# Step 1: We are create a new user in Cognito User Pool
try:
response = cognito_client.sign_up(
ClientId=APP_CLIENT_ID
Username = username,
Password=password,
UserAttributes=[
{'Name': 'email', 'Value': email},
],
)
user_sub = response['UserSub']
session['user_sub'] = user_sub
return redirect(url_for('mfa_verification'))
except cognito_client.exceptions.UsernameExistsException:
return render_template("signup.html", error="Username already exists. Choose a different one")
except cognito_client.exceptions.UserLambdaValidationException as e:
return render_template("signup.html", error=f"Error: {e}")
except Exception as e:
return render_template("signup.html", error=f"Error: {e}")

return render_template("signup.html")

@app.route("/mfa_verification", methods=["GET", "POST"])
def mfa_verification():
if 'user_sub' not in session:
return redirect(url_for('signup'))
if request.method =="POST":
mfa_code = request.form["mfa_code"]

# Step 2: Verify MFA code
try:
cognito_client.verify_software_token(
AccessToken=session['user_sub'],
UserCode = mfa_code,
)

return render_template("index.html", prompt="", generated_text="MFA verification successful.")
except cognito_client.exceptions.CodeMismatchException:
return render_template("mfa_verification.html", error="Incorrect MFA code. Please try again.")

except Exception as e:
return render_template("mfa_verification.html", error=f"Error:{e}")

return render_template("mfa_verification.html")

if __name__ == "__main__":
app.secret_key = 'supersecretkey'
app.run(debug=True)

Loading