diff --git a/README.md b/README.md
index 5bc9f17..edd6e05 100644
--- a/README.md
+++ b/README.md
@@ -3,6 +3,13 @@
![welcome](assets/welcome.png)
+
+
diff --git a/build/lib/edg4llm/__init__.py b/build/lib/edg4llm/__init__.py
new file mode 100644
index 0000000..1cdaf9f
--- /dev/null
+++ b/build/lib/edg4llm/__init__.py
@@ -0,0 +1,7 @@
+from edg4llm.core.interface import EDG4LLM
+
+__all__ = ["EDG4LLM"]
+
+__version__ = "1.0.14"
+__author__ = "Alannikos"
+__license__ = "MIT"
diff --git a/build/lib/edg4llm/core/__init__.py b/build/lib/edg4llm/core/__init__.py
new file mode 100644
index 0000000..3f40e95
--- /dev/null
+++ b/build/lib/edg4llm/core/__init__.py
@@ -0,0 +1 @@
+from edg4llm.core.interface import EDG4LLM
diff --git a/build/lib/edg4llm/core/dataGenerators.py b/build/lib/edg4llm/core/dataGenerators.py
new file mode 100644
index 0000000..12bb8cf
--- /dev/null
+++ b/build/lib/edg4llm/core/dataGenerators.py
@@ -0,0 +1,253 @@
+import os
+from typing import Dict, List, Any
+
+from edg4llm.utils.logger import custom_logger
+from edg4llm.models.chatglm import EDGChatGLM
+from edg4llm.models.chatgpt import EDGChatGPT
+from edg4llm.models.internlm import EDGInternLM
+from edg4llm.models.deepseek import EDGDeepSeek
+from edg4llm.generators.text_generators.answer_generator import AnswerGenerator
+from edg4llm.generators.text_generators.question_generator import QuestionGenerator
+from edg4llm.generators.text_generators.dialogue_generator import DialogueGenerator
+
+from edg4llm.processor.preprocess import PreProcessor
+
+logger = custom_logger("dataGenerator")
+
+class DataGenerator:
+ def __init__(self, pConfig):
+ """
+ Initialize the Data Generator
+
+ This method initializes the model and its associated generators (Answer, Question, Dialogue)
+ based on the provided configuration parameters.
+
+ Parameters
+ ----------
+ pConfig : dict
+ A configuration dictionary containing the following key-value pairs:
+ - "model_provider" : str, optional
+ The type of language model to use ("chatglm", "chatgpt", "internlm", "deepseek"). Default is "chatglm".
+ - "model_name" : str, optional
+ The specific model to use within the selected provider. Default is "chatglm-4-flash".
+ - "base_url" : str
+ The base URL for the LLM API. Default is None.
+ - "api_key" : str
+ The API key for authenticating requests. Default is None.
+
+ Raises
+ ------
+ ValueError
+ If the provided model type is not supported, raises a `ValueError`.
+
+ Attributes
+ ----------
+ model : object
+ The selected language model instance, initialized based on the "model_provider" configuration.
+ answer_generator : AnswerGenerator
+ An instance of the AnswerGenerator to generate answers.
+ question_generator : QuestionGenerator
+ An instance of the QuestionGenerator to generate questions.
+ dialogue_generator : DialogueGenerator
+ An instance of the DialogueGenerator to generate dialogues.
+
+ Notes
+ -----
+ - Supported model providers include: "chatglm", "chatgpt", "internlm", "deepseek".
+ - If the "model_provider" is unsupported, a `ValueError` will be raised.
+ """
+
+ if pConfig["model_provider"] == "chatglm":
+ self.model = EDGChatGLM(
+ model_name=pConfig["model_name"],
+ base_url=pConfig["base_url"],
+ api_key=pConfig["api_key"]
+ )
+ elif pConfig["model_provider"] == "chatgpt":
+ self.model = EDGChatGPT(
+ model_name=pConfig["model_name"],
+ base_url=pConfig["base_url"],
+ api_key=pConfig["api_key"]
+ )
+ elif pConfig["model_provider"] == "internlm":
+ self.model = EDGInternLM(
+ model_name=pConfig["model_name"],
+ base_url=pConfig["base_url"],
+ api_key=pConfig["api_key"]
+ )
+ elif pConfig["model_provider"] == "deepseek":
+ self.model = EDGDeepSeek(
+ model_name=pConfig["model_name"],
+ base_url=pConfig["base_url"],
+ api_key=pConfig["api_key"]
+ )
+ else:
+ raise ValueError("Unsupported model provider")
+
+ self.preprocessor = PreProcessor()
+ self.answer_generator = AnswerGenerator(self.model)
+ self.question_generator = QuestionGenerator(self.model)
+ self.dialogue_generator = DialogueGenerator(self.model)
+
+ def generate_question(self, tConfig) -> List[Dict]:
+ """
+ Generate questions based on the given configuration.
+
+ This method uses the `question_generator` to generate question data based on
+ the provided configuration options. It supports various parameters to control
+ the question generation process, such as task type, prompts, sampling strategies, and output formatting.
+
+ Parameters
+ ----------
+ tConfig : dict
+ A configuration dictionary containing the following key-value pairs:
+ - "language" : str, optional
+ The language of data in data generation. Must be one of 'zh', 'en'.
+ Default is 'zh'.
+ - "task_type" : str, optional
+ The type of task for data generation. Must be 'question' to ensure valid output.
+ Default is 'question'.
+ - "system_prompt" : str, optional
+ A system-level prompt to guide the question generation. Default is None.
+ - "user_prompt" : str, optional
+ A user-provided prompt to initiate the question generation. Default is None.
+ - "do_sample" : bool, optional
+ Whether to use sampling during question generation. If True, enables sampling strategies like
+ temperature and top_p. If False, uses deterministic decoding. Default is True.
+ - "temperature" : float, optional
+ Sampling temperature to control randomness. Must be in the range [0.0, 1.0].
+ Default is 0.95.
+ - "top_p" : float, optional
+ Nucleus sampling parameter for controlling randomness. Must be in the range [0.0, 1.0]. Default is 0.7.
+ - "max_tokens" : int, optional
+ The maximum number of tokens to generate in the question output. Default is 4095.
+ - "num_samples" : int, optional
+ The number of question samples to generate. Default is 10.
+ - "output_format" : str, optional
+ The format of the output, such as "alpaca" or other formats. Default is "alpaca".
+
+ Returns
+ -------
+ list of dict
+ A list of dictionaries containing the generated question outputs.
+
+ Notes
+ -----
+ - This method uses the `generate` method from the `question_generator` to produce question data
+ based on the provided configuration.
+ - The `tConfig` dictionary allows for flexible question generation based on task type,
+ system/user prompts, and various sampling strategies.
+ """
+
+ tConfig["user_prompt"] = self.preprocessor.question_preprocess(tConfig["language"], tConfig["user_prompt"])
+
+ data = self.question_generator.generate(tConfig)
+ return data
+
+ def generate_answer(self, tConfig) -> List[Dict]:
+ """
+ Generate answers based on the given configuration.
+
+ This method uses the `answer_generator` to generate answer data based on
+ the provided configuration options. It supports various parameters to control
+ the answer generation process, such as task type, prompts, sampling strategies, and output formatting.
+
+ Parameters
+ ----------
+ tConfig : dict
+ A configuration dictionary containing the following key-value pairs:
+ - "language" : str, optional
+ The language of data in data generation. Must be one of 'zh', 'en'.
+ Default is 'zh'.
+ - "task_type" : str, optional
+ The type of task for data generation. Must be 'answer' to ensure valid output.
+ Default is 'answer'.
+ - "system_prompt" : str, optional
+ A system-level prompt to guide the answer generation. Default is None.
+ - "user_prompt" : str, optional
+ A user-provided prompt to initiate the answer generation. Default is None.
+ - "do_sample" : bool, optional
+ Whether to use sampling during answer generation. If True, enables sampling strategies like
+ temperature and top_p. If False, uses deterministic decoding. Default is True.
+ - "temperature" : float, optional
+ Sampling temperature to control randomness. Must be in the range [0.0, 1.0].
+ Default is 0.95.
+ - "top_p" : float, optional
+ Nucleus sampling parameter for controlling randomness. Must be in the range [0.0, 1.0]. Default is 0.7.
+ - "max_tokens" : int, optional
+ The maximum number of tokens to generate in the answer output. Default is 4095.
+ - "num_samples" : int, optional
+ The number of answer samples to generate. Default is 10.
+ - "output_format" : str, optional
+ The format of the output, such as "json" or other formats. Default is "json".
+
+ Returns
+ -------
+ list of dict
+ A list of dictionaries containing the generated answer outputs.
+
+ Notes
+ -----
+ - This method uses the `generate` method from the `answer_generator` to produce answer data
+ based on the provided configuration.
+ - The `tConfig` dictionary allows for flexible answer generation based on task type,
+ system/user prompts, and various sampling strategies.
+ """
+
+ tConfig["user_prompt"] = self.preprocessor.answer_preprocess(tConfig["language"], tConfig["user_prompt"])
+ data = self.answer_generator.generate(tConfig)
+ return data
+
+ def generate_dialogue(self, tConfig) -> List[Dict]:
+ """
+ Generate a dialogue based on the given configuration.
+
+ This method utilizes the `dialogue_generator` to generate dialogues using the
+ provided configuration options. It supports various parameters to control
+ the text generation process, such as task type, prompts, sampling strategies, and output formatting.
+
+ Parameters
+ ----------
+ tConfig : dict
+ A configuration dictionary containing the following key-value pairs:
+ - "language" : str, optional
+ The language of data in data generation. Must be one of 'zh', 'en'.
+ Default is 'zh'.
+ - "task_type" : str, optional
+ The type of task for data generation. Must be one of 'question', 'answer', or 'dialogue'.
+ Default is 'dialogue'.
+ - "system_prompt" : str, optional
+ A system-level prompt to guide the text generation. Default is None.
+ - "user_prompt" : str, optional
+ A user-provided prompt to initiate the text generation. Default is None.
+ - "do_sample" : bool, optional
+ Whether to use sampling during text generation. If True, enables sampling strategies like temperature
+ and top_p. If False, uses deterministic decoding. Default is True.
+ - "temperature" : float, optional
+ Sampling temperature to control randomness. Must be in the range [0.0, 1.0].
+ Default is 0.95.
+ - "top_p" : float, optional
+ Nucleus sampling parameter for controlling randomness. Must be in the range [0.0, 1.0]. Default is 0.7.
+ - "max_tokens" : int, optional
+ The maximum number of tokens to generate in the output. Default is 4095.
+ - "num_samples" : int, optional
+ The number of output samples to generate. Default is 10.
+ - "output_format" : str, optional
+ The format of the output. Default is "alpaca".
+
+ Returns
+ -------
+ list of dict
+ A list of dictionaries containing the generated dialogue outputs.
+
+ Notes
+ -----
+ - This method uses the `generate` method from the `dialogue_generator` to produce dialogue outputs
+ based on the provided configuration.
+ - The `tConfig` dictionary allows for flexible generation based on task type, system/user prompts,
+ and various sampling strategies.
+ """
+
+ tConfig["user_prompt"] = self.preprocessor.dialogue_preprocess(tConfig["language"], tConfig["user_prompt"])
+ data = self.dialogue_generator.generate(tConfig)
+ return data
\ No newline at end of file
diff --git a/build/lib/edg4llm/core/interface.py b/build/lib/edg4llm/core/interface.py
new file mode 100644
index 0000000..64fe88d
--- /dev/null
+++ b/build/lib/edg4llm/core/interface.py
@@ -0,0 +1,333 @@
+"""
+EDG4LLM: A Comprehensive Interface for Text Generation with Configurable LLMs
+
+Overview
+--------
+The EDG4LLM class serves as a high-level interface for generating text using a language model pipeline.
+It supports configuration for task types, prompts, sampling strategies, and output formats, making it versatile
+and adaptable to various use cases.
+
+Key Features
+------------
+- Task Flexibility: Supports task types such as 'dialogue', 'question', and 'answer'.
+- Custom Prompts: Allows system-level and user-level prompts to guide the generation process.
+- Sampling Controls: Provides options to customize randomness and diversity of outputs using
+ parameters like `do_sample`, `temperature`, and `top_p`.
+- Output Formats: Compatible with customizable output formats, such as "alpaca".
+"""
+
+
+
+import os
+from typing import Any, Tuple, Dict
+
+from edg4llm.utils.logger import custom_logger
+from edg4llm.core.pipeline import DataPipeline
+
+logger = custom_logger("interface")
+
+
+class EDG4LLM:
+ """
+ EDG4LLM: A Class for Configurable Text Generation with LLMs
+
+ This class provides an interface for generating text using a configurable language model pipeline.
+ It allows users to specify a variety of parameters, including model type, prompts, sampling strategies,
+ and output formats.
+
+ Attributes
+ ----------
+ pipeline : DataPipeline
+ An instance of the `DataPipeline` class, used to handle the data processing
+ and interaction with the language model.
+
+ Methods
+ -------
+ __init__(model_provider: str = "chatglm", model_name: str = "chatglm-4-flash", base_url: str = None, api_key: str = None):
+ Initializes the EDG4LLM instance with the model type, base URL, and API key.
+
+ generate(task_type: str = 'dialogue', system_prompt: str = None, user_prompt: str = None,
+ do_sample: bool = True, temperature: float = 0.95, top_p: float = 0.7,
+ max_tokens: int = 4095, num_samples: int = 10, output_format: str = "alpaca") -> List[Dict]:
+ Generates text data based on the provided configuration.
+
+ Notes
+ -----
+ - This class leverages the `DataPipeline` for all interactions with the language model.
+ - The `generate` method is user-facing.
+ - Supports customization for tasks like 'dialogue', 'question', and 'answer'.
+ - Ensures compatibility with different output formats (e.g., "alpaca").
+
+ Examples
+ --------
+ >>> # Create an instance of EDG4LLM
+ >>> generator = EDG4LLM(model_provider="chatglm", model_name="chatglm-4-flash", base_url="https://api.example.com", api_key="your_api_key")
+
+ >>> # Generate a dialogue response
+ >>> response = generator.generate(
+ task_type="answer",
+ system_prompt="You are a helpful assistant.",
+ user_prompt="What is the weather today?",
+ max_tokens=100
+ )
+
+ >>> print(response)
+ Output: [{'output': 'The weather today is sunny with a high of 25°C.'}]
+ """
+ def __init__(self,
+ model_provider: str = "chatglm",
+ model_name: str = "chatglm-4-flash",
+ base_url: str = None,
+ api_key: str = None):
+ """
+ Initialize the EDG4LLM instance with the necessary parameters.
+
+ Parameters
+ ----------
+ model_provider: str, optional
+ The type of language model to use, by default "chatglm".
+ model_name : str, optional
+ The specific model to use within the model type, by default "chatglm-4-flash".
+ base_url : str, optional
+ The base URL of the LLM API, by default None.
+ api_key : str, optional
+ The API key for authenticating requests, by default None.
+ """
+
+ self._pConfig = {
+ "model_provider": model_provider
+ ,"model_name" : model_name
+ , "base_url": base_url
+ , "api_key" : api_key
+ }
+
+ self.pipeline = DataPipeline(self._pConfig)
+ logger.info("DataPipeline initialized successfully with the provided configuration.")
+
+ def generate(self
+ , language: str = 'zh'
+ , task_type: str = 'dialogue'
+ , system_prompt: str = None
+ , user_prompt: str = None
+ , do_sample: bool = True
+ , temperature: float = 0.95
+ , top_p: float = 0.7
+ , max_tokens: int = 4095
+ , num_samples: int = 10
+ , output_format: str = "alpaca"
+ , question_path: str = None
+ ):
+ """
+ Generate text data based on the specified configuration.
+
+ Parameters
+ ----------
+ language : str, optional
+ The language of data in data generation. Must be one of 'zh', 'en'.
+ Default is 'zh'.
+
+ task_type : str, optional
+ The type of task for data generation. Must be one of 'question', 'answer', or 'dialogue'.
+ Default is 'dialogue'.
+
+ system_prompt : str, optional
+ A system-level prompt to guide the text generation.
+ Default is None.
+
+ user_prompt : str, optional
+ A user-provided prompt to initiate the text generation.
+ Default is None.
+
+ do_sample : bool, optional
+ Whether to use sampling during text generation.
+ - If True, enables sampling strategies like temperature and top_p.
+ - If False, uses deterministic decoding (e.g., greedy decoding), and
+ `temperature` and `top_p` are ignored.
+ Default is True.
+
+ temperature : float, optional
+ Sampling temperature to control randomness.
+ - Must be a positive number in the range [0.0, 1.0].
+ - Higher values produce more diverse outputs, while lower values make
+ the output more focused and deterministic.
+ Default is 0.95.
+
+ top_p : float, optional
+ Nucleus sampling parameter for controlling randomness.
+ - Limits token selection to the top cumulative probability range
+ defined by p.
+ - Must be in the range [0.0, 1.0].
+ Default is 0.7.
+
+ max_tokens : int, optional
+ The maximum number of tokens to generate in the output.
+ - Default: 4095.
+ - Maximum allowed value: 4095 (values exceeding this will be capped).
+
+ num_samples : int, optional
+ The number of output samples to generate.
+ Default is 10.
+
+ output_format : str, optional
+ The format of the output.
+ Default is "alpaca".
+
+ question_path : str, optional
+ The path to a file containing a list of questions.
+ - Only applicable when `task_type` is set to 'answer'.
+ - The model will read the file and generate answers for each question in the file.
+ - The output will be returned in a specific format as defined by the `output_format` parameter.
+ Default is None.
+
+ Returns
+ -------
+ list of dict
+ A list of dictionaries containing the generated outputs.
+
+ Examples
+ --------
+ >>> # Create an instance of EDG4LLM
+ >>> generator = EDG4LLM(model_provider="chatglm", model_name="chatglm-4-flash", base_url="https://api.example.com", api_key="your_api_key")
+
+ >>> # Generate a dialogue response
+ >>> response = generator.generate(
+ task_type="answer",
+ system_prompt="You are a helpful assistant.",
+ user_prompt="What is the weather today?",
+ max_tokens=100
+ )
+
+ >>> print(response)
+ Output: [{'output': 'The weather today is sunny with a high of 25°C.'}]
+
+ Notes
+ -----
+ The method will use a pipeline's `generate_data` function to create outputs
+ based on the provided configuration.
+ """
+
+ data = self._generate(language, task_type, system_prompt, user_prompt, do_sample, temperature, top_p, max_tokens, num_samples, output_format, question_path)
+ logger.info("Data generation completed successfully for task_type: %s", task_type)
+
+ return data
+
+ def _generate(self,
+ language: str = 'zh',
+ task_type: str = 'dialogue',
+ system_prompt: str = None,
+ user_prompt: str = None,
+ do_sample: bool = True,
+ temperature: float = 0.95,
+ top_p: float = 0.7,
+ max_tokens: int = 4095,
+ num_samples: int = 10,
+ output_format: str = "alpaca",
+ question_path: str = None
+ ):
+ """
+ Generate text data based on the specified configuration.
+
+ Parameters
+ ----------
+ language : str, optional
+ The language of data in data generation. Must be one of 'zh', 'en'.
+ Default is 'zh'.
+
+ task_type : str, optional
+ The type of task for data generation. Must be one of 'question', 'answer', or 'dialogue'.
+ Default is 'dialogue'.
+
+ system_prompt : str, optional
+ A system-level prompt to guide the text generation.
+ Default is None.
+
+ user_prompt : str, optional
+ A user-provided prompt to initiate the text generation.
+ Default is None.
+
+ do_sample : bool, optional
+ Whether to use sampling during text generation.
+ - If True, enables sampling strategies like temperature and top_p.
+ - If False, uses deterministic decoding (e.g., greedy decoding), and
+ `temperature` and `top_p` are ignored.
+ Default is True.
+
+ temperature : float, optional
+ Sampling temperature to control randomness.
+ - Must be a positive number in the range [0.0, 1.0].
+ - Higher values produce more diverse outputs, while lower values make
+ the output more focused and deterministic.
+ Default is 0.95.
+
+ top_p : float, optional
+ Nucleus sampling parameter for controlling randomness.
+ - Limits token selection to the top cumulative probability range
+ defined by p.
+ - Must be in the range [0.0, 1.0].
+ Default is 0.7.
+
+ max_tokens : int, optional
+ The maximum number of tokens to generate in the output.
+ - Default: 4095.
+ - Maximum allowed value: 4095 (values exceeding this will be capped).
+
+ num_samples : int, optional
+ The number of output samples to generate.
+ Default is 10.
+
+ output_format : str, optional
+ The format of the output.
+ Default is "alpaca".
+
+ question_path : str, optional
+ The path to a file containing a list of questions.
+ - Only applicable when `task_type` is set to 'answer'.
+ - The model will read the file and generate answers for each question in the file.
+ - The output will be returned in a specific format as defined by the `output_format` parameter.
+ Default is None.
+
+ Returns
+ -------
+ list of dict
+ A list of dictionaries containing the generated outputs.
+
+ Examples
+ --------
+ >>> # Create an instance of EDG4LLM
+ >>> generator = EDG4LLM(model_provider="chatglm", model_name="chatglm-4-flash", base_url="https://api.example.com", api_key="your_api_key")
+
+ >>> # Generate a dialogue response
+ >>> response = generator.generate(
+ task_type="answer",
+ system_prompt="You are a helpful assistant.",
+ user_prompt="What is the weather today?",
+ max_tokens=100
+ )
+
+ >>> print(response)
+ Output: [{'output': 'The weather today is sunny with a high of 25°C.'}]
+
+ Notes
+ -----
+ The method will use a pipeline's `generate_data` function to create outputs
+ based on the provided configuration.
+ """
+
+ self._tConfig = {
+ "language": language,
+ "task_type": task_type, # The type of task for data generation
+ "system_prompt": system_prompt, # The system-level prompt
+ "user_prompt": user_prompt, # The user-provided prompt
+ "do_sample": do_sample, # Whether to use sampling
+ "temperature": temperature, # Sampling temperature
+ "top_p": top_p, # Nucleus sampling parameter
+ "max_tokens": max_tokens, # Maximum tokens in the output
+ "num_samples": num_samples, # Number of output samples
+ "output_format": output_format, # Desired output format
+ "question_path": question_path
+ }
+
+ # Call the pipeline's generate_data method using the configuration dictionary
+ data = self.pipeline.generate_data(self._tConfig)
+
+ return data
diff --git a/build/lib/edg4llm/core/pipeline.py b/build/lib/edg4llm/core/pipeline.py
new file mode 100644
index 0000000..d8ba770
--- /dev/null
+++ b/build/lib/edg4llm/core/pipeline.py
@@ -0,0 +1,88 @@
+import os
+from typing import Any, Tuple, Dict
+
+from edg4llm.utils.logger import custom_logger
+from edg4llm.core.dataGenerators import DataGenerator
+
+logger = custom_logger("DataPipeline")
+
+class DataPipeline:
+ """
+ The DataPipeline class manages the entire process of generating data, designed to
+ automatically create fine-tuning data for different task types such as question
+ generation, answer generation, and dialogue generation.
+
+ This class uses a DataGenerator object to handle the core logic of data generation
+ and dynamically executes the corresponding task based on the provided configuration
+ parameters. It provides a unified interface for users to easily invoke specific
+ data generation methods with minimal configuration.
+
+ Attributes:
+ ----------
+ data_generator (DataGenerator): An object that handles the specific data generation tasks.
+
+ Methods:
+ ----------
+ __init__(pConfig): Initializes the DataPipeline class and creates a DataGenerator
+ object based on the configuration.
+ generate_data(tConfig): Generates fine-tuning data based on the task configuration.
+ Supported task types include question generation, answer generation,
+ and dialogue generation.
+ """
+
+ def __init__(self, pConfig):
+ """
+ Initializes the data generation process.
+
+ Parameters
+ ----------
+ pConfig : dict
+ Configuration for initializing the DataGenerator. Expected to contain:
+ - model_provider: str
+ The type of language model to use, by default "chatglm".
+ - model_name: str
+ The specific model to use within the model type, by default "chatglm-4-flash".
+ - base_url : str
+ The base URL of the LLM API.
+ - api_key : str
+ The API key for authentication.
+ """
+
+ self.data_generator = DataGenerator(pConfig)
+
+ def generate_data(self, tConfig) -> Dict:
+ """
+ Generates data based on the provided configuration.
+
+ Parameters
+ ----------
+ tConfig : Dict
+ Task configuration containing the following keys:
+ - task_type : str
+ Specifies the type of task ('question', 'answer', or 'dialogue').
+ - Other parameters required for data generation, specific to the task type.
+
+ Returns
+ -------
+ dict
+ A dictionary containing the generated fine-tuning data.
+
+ Raises
+ ------
+ ValueError
+ If the provided task type is unsupported.
+ """
+ if tConfig["task_type"] == "question":
+ logger.info("Generated data for task_type: 'question'")
+ data = self.data_generator.generate_question(tConfig)
+ elif tConfig["task_type"] == "answer":
+ logger.info("Generated data for task_type: 'answer'")
+ data = self.data_generator.generate_answer(tConfig)
+ elif tConfig["task_type"] == "dialogue":
+ logger.info("Generated data for task_type: 'dialogue'")
+ data = self.data_generator.generate_dialogue(tConfig)
+ else:
+ logger.error("Unsupported task type: %s", tConfig["task_type"])
+ raise ValueError("Unsupported task type")
+
+ return data
diff --git a/build/lib/edg4llm/generators/__init__.py b/build/lib/edg4llm/generators/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/build/lib/edg4llm/generators/text_generators/__init__.py b/build/lib/edg4llm/generators/text_generators/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/build/lib/edg4llm/generators/text_generators/answer_generator.py b/build/lib/edg4llm/generators/text_generators/answer_generator.py
new file mode 100644
index 0000000..a66d0c2
--- /dev/null
+++ b/build/lib/edg4llm/generators/text_generators/answer_generator.py
@@ -0,0 +1,191 @@
+import os
+import sys
+import json
+from typing import Dict, Any
+
+from edg4llm.utils.logger import custom_logger
+from edg4llm.generators.text_generators.base_generator import BaseGenerator
+
+logger = custom_logger("AnswerGenerator")
+
+class AnswerGenerator(BaseGenerator):
+ """
+ A class for generating answers based on user queries using a specified model.
+
+ This class extends the `BaseGenerator` class and provides functionality to generate
+ answers to user queries based on a given configuration. It interacts with the model's
+ `execute_request` method to generate responses based on system-level and user-level prompts.
+ It supports customization through parameters such as temperature, sampling strategies,
+ and token limits.
+
+ Attributes
+ ----------
+ model : object
+ The model interface used for generating answers.
+
+ Methods
+ -------
+ generate(tConfig: dict) -> list of dict:
+ Generates answers based on the provided configuration.
+
+ Notes
+ -----
+ - The `generate` method ensures valid answers are returned, retrying if necessary.
+ - It logs progress for each generated answer.
+ """
+
+ def __init__(self, model):
+ """
+ Initialize the AnswerGenerator.
+
+ Parameters
+ ----------
+ model : object
+ The model interface used for generating answers.
+ """
+
+ super().__init__(model)
+
+ def generate(self, tConfig) -> str:
+ """
+ Generate answers based on the provided configuration.
+
+ This method generates one or more answers based on the parameters provided in
+ the `tConfig` dictionary. It uses the model's `execute_request` method to generate
+ answers based on the system and user prompts, with options to control randomness,
+ output length, and sampling strategy.
+
+ Parameters
+ ----------
+ tConfig : dict
+ A configuration dictionary containing the following key-value pairs:
+ - "system_prompt" : str, optional
+ A system-level prompt that provides context for generating the answer. Default is an empty string.
+ - "user_prompt" : str
+ A user-provided prompt (query) to generate the corresponding answer.
+ - "model" : str, optional
+ The specific model to use for answer generation. Default is "glm-4-flash".
+ - "do_sample" : bool, optional
+ Whether to use sampling strategies during answer generation. Default is True.
+ - "temperature" : float, optional
+ A sampling parameter to control the randomness of the output. Must be between 0.0 and 1.0. Default is 0.95.
+ - "top_p" : float, optional
+ Nucleus sampling parameter controlling the cumulative probability range for token selection.
+ Must be between 0.0 and 1.0. Default is 0.7.
+ - "max_tokens" : int, optional
+ The maximum number of tokens to generate in the answer. Default is 4095.
+ - "num_samples" : int, optional
+ The number of answers to generate. Default is 1.
+
+ Returns
+ -------
+ list of dict
+ A list of dictionaries containing the generated answers. Each dictionary
+ includes the generated answer content and relevant metadata.
+
+ Notes
+ -----
+ - The method will retry generating answers if the model fails to provide a valid response.
+ - Progress and debug information are logged for each generated answer.
+ """
+
+ # Extract configuration parameters
+ system_prompt = tConfig.get("system_prompt", "")
+ user_prompt = tConfig.get("user_prompt", "")
+ do_sample = tConfig.get("do_sample", True)
+ temperature = tConfig.get("temperature", 0.95)
+ top_p = tConfig.get("top_p", 0.7)
+ max_tokens = tConfig.get("max_tokens", 4095)
+ num_samples = tConfig.get("num_samples", 1) # Default is to generate 1 sample
+ question_path = tConfig.get("question_path", None)
+
+ try:
+ with open(question_path, "r", encoding="utf-8") as file:
+ data = json.load(file)
+
+ if isinstance(data, dict): # If it's a single dictionary, wrap it in a list
+ data = [data]
+ elif not isinstance(data, list): # Ensure it's a list of dictionaries
+ raise ValueError("Invalid JSON structure. Expected a list or a dictionary.")
+
+ # Extract questions
+ questions = [item["question"] for item in data if "question" in item]
+ except FileNotFoundError:
+ logger.error("The file at path %s was not found.", question_path)
+ return None
+ except json.JSONDecodeError as e:
+ logger.error("Error decoding JSON from file %s: %s", question_path, str(e))
+ return None
+ except Exception as e:
+ logger.error("Unexpected error: %s", str(e))
+ return None
+
+ if len(questions) != num_samples:
+ logger.error(
+ "The number of questions (%d) does not match the expected number (%d). Please check your input.",
+ len(questions),
+ num_samples,
+ )
+
+ sys.exit(1) # 非零退出码表示异常终止
+
+ # List to store the generated dialogues
+ dialogues = []
+
+ # Generate dialogues for the specified number of samples
+ total_samples = num_samples # Total number of samples to generate
+ logger.info("Starting the data generation process.")
+ for _idx, question in enumerate(questions):
+ retry_count = 0 # 初始化重试计数
+ max_retries = 5 # 设置最大重试次数(根据需要调整)
+
+ while True: # Keep trying until valid dialogue data is generated
+ retry_count += 1
+
+ generated_answer = self.model.execute_request(
+ system_prompt=system_prompt,
+ user_prompt=user_prompt.replace("EDG4LLM", question),
+ do_sample=do_sample,
+ temperature=temperature,
+ top_p=top_p,
+ max_tokens=max_tokens,
+ )
+
+ if "error" in generated_answer:
+ logger.warning(
+ "Sample %d: Request failed with error: %s. Retrying (%d/%d)...",
+ _idx + 1,
+ generated_answer["error"],
+ retry_count,
+ max_retries,
+ )
+
+ if retry_count >= max_retries:
+ logger.error("Sample %d: Max retries reached. Skipping this sample.", _idx + 1)
+ break # 跳出当前样本,进入下一个
+ continue # 继续当前样本的生成
+
+ # Convert the generated dialogue to the desired format (e.g., Alpaca format)
+ converted_generated_answer = self._convert_original_to_alpaca_answer(system_prompt, question, generated_answer)
+
+ if converted_generated_answer is not None:
+ # If the dialogue is valid, append it to the results and break the loop
+ dialogues.append(converted_generated_answer)
+ break
+ else:
+ logger.warning(
+ "Sample %d: Generated answer is None. Retrying (%d/%d)...",
+ _idx + 1,
+ retry_count,
+ max_retries,
+ )
+
+ if retry_count >= max_retries:
+ logger.error("Sample %d: Max retries reached. Skipping this sample.", _idx + 1)
+ break # 跳出当前样本
+
+ # Log the progress of dialogue generation
+ progress = ((_idx+1) / total_samples) * 100
+ logger.info("Data generation progress: %.2f%% (%d/%d samples completed)", progress, _idx+1, total_samples)
+
+ return dialogues
diff --git a/build/lib/edg4llm/generators/text_generators/base_generator.py b/build/lib/edg4llm/generators/text_generators/base_generator.py
new file mode 100644
index 0000000..a857635
--- /dev/null
+++ b/build/lib/edg4llm/generators/text_generators/base_generator.py
@@ -0,0 +1,131 @@
+import os
+from abc import ABC, abstractmethod
+from typing import Dict
+
+from edg4llm.processor.postprocess import PostProcessor
+class BaseGenerator(ABC):
+ """
+ Base class for all data generators, defining a common interface for generating data.
+
+ This class serves as a foundation for different types of data generators, providing common functionality
+ such as interaction with a model and post-processing of generated data. Specific generators should extend
+ this class and implement their own `generate` method.
+
+ Attributes
+ ----------
+ model : object
+ The model interface used for generating data.
+ postprocessor : PostProcessor
+ An instance of the PostProcessor class for handling post-processing of generated data.
+
+ Methods
+ -------
+ generate(prompt: str) -> str
+ Abstract method to generate data based on a prompt. Must be implemented by subclasses.
+
+ """
+ def __init__(self, model):
+ """
+ Initialize the generator.
+
+ Parameters
+ ----------
+ model : object
+ The model interface used for generating data.
+ """
+
+ self.model = model
+ self.postprocessor = PostProcessor()
+
+ @abstractmethod
+ def generate(self, prompt: str) -> str:
+ """
+ Convert original data into Alpaca format.
+
+ This method uses the PostProcessor to process conversation data and structure it
+ in a format suitable for Alpaca-based models.
+
+ Parameters
+ ----------
+ system_prompt : str
+ The system-level prompt for context in the Alpaca format.
+ single_data : str
+ The raw conversation data to be processed.
+
+ Returns
+ -------
+ dict
+ The conversation data converted to Alpaca format.
+ """
+ pass
+
+ def _convert_original_to_alpaca(self, system_prompt, single_data):
+ """
+ Convert original data into Alpaca format.
+
+ This method uses the PostProcessor to process conversation data and structure it
+ in a format suitable for Alpaca-based models.
+
+ Parameters
+ ----------
+ system_prompt : str
+ The system-level prompt for context in the Alpaca format.
+ single_data : str
+ The raw conversation data to be processed.
+
+ Returns
+ -------
+ dict
+ The conversation data converted to Alpaca format.
+ """
+
+ converted_data = self.postprocessor.dialogue_postprocessing(conversation_data=single_data, system_prompt=system_prompt)
+
+ return converted_data
+
+ def _convert_original_to_json(self, single_data):
+ """
+ Convert original data into JSON format.
+
+ This method uses the PostProcessor to process raw data into a JSON-compatible structure.
+
+ Parameters
+ ----------
+ single_data : str
+ The raw question data to be processed.
+
+ Returns
+ -------
+ dict
+ The data converted into JSON format.
+ """
+
+ converted_data = self.postprocessor.question_postprocessing(question_data=single_data)
+
+ return converted_data
+
+ def _convert_original_to_alpaca_answer(self, system_prompt, question, single_data):
+ """
+ Convert original data into Alpaca answer format.
+
+ This method uses the PostProcessor to process raw data into an answer format suitable for Alpaca-based models.
+
+ Parameters
+ ----------
+ system_prompt : str
+ The system-level prompt for context in the Alpaca format.
+ question : str
+ The question text for which the answer is generated.
+ single_data : str
+ The raw answer data to be processed.
+
+ Returns
+ -------
+ dict
+ The data converted into Alpaca format.
+ """
+
+ converted_data = self.postprocessor.answer_postprocessing(question=question, answer=single_data, system_prompt=system_prompt)
+
+ return converted_data
+
\ No newline at end of file
diff --git a/build/lib/edg4llm/generators/text_generators/dialogue_generator.py b/build/lib/edg4llm/generators/text_generators/dialogue_generator.py
new file mode 100644
index 0000000..e1a9e71
--- /dev/null
+++ b/build/lib/edg4llm/generators/text_generators/dialogue_generator.py
@@ -0,0 +1,159 @@
+import os
+from typing import Dict, List, Any
+
+from edg4llm.utils.logger import custom_logger
+from edg4llm.generators.text_generators.base_generator import BaseGenerator
+
+logger = custom_logger("DialogueGenerator")
+
+class DialogueGenerator(BaseGenerator):
+ """
+ Dialogue Generator class for generating dialogues using a specified model.
+
+ This class extends the `BaseGenerator` and utilizes the given model to generate dialogues
+ based on user input and system prompts. It provides flexibility to control generation parameters
+ like sampling strategies, temperature, and output format.
+
+ Parameters
+ ----------
+ model : object
+ The model interface used for generating dialogues. This model must have the
+ `execute_request` method for generating dialogue based on the given parameters.
+ """
+
+ def __init__(self, model):
+ """
+ Initialize the Dialogue Generator.
+
+ This constructor initializes the `DialogueGenerator` by calling the base class constructor
+ with the provided model. It sets up the necessary components for generating dialogues.
+
+ Parameters
+ ----------
+ model : object
+ The model interface to be used for generating dialogues. It should provide
+ the `execute_request` method to generate data based on the parameters.
+
+ Notes
+ -----
+ The `model` should be capable of handling inputs like system prompts, user prompts,
+ and additional parameters for controlling the text generation process.
+ """
+ super().__init__(model)
+
+ def generate(self, tConfig) -> List:
+ """
+ Generate dialogues based on the provided configuration.
+
+ This method generates one or more dialogues based on the parameters provided in
+ the `tConfig` dictionary. The method interacts with the model's `execute_request`
+ function to generate dialogue based on the system and user prompts. It also supports
+ various options for controlling randomness, output length, and sampling strategy.
+
+ Parameters
+ ----------
+ tConfig : dict
+ A configuration dictionary containing the following key-value pairs:
+ - "system_prompt" : str, optional
+ A system-level prompt that guides the dialogue generation. Default is an empty string.
+ - "user_prompt" : str, optional
+ A user-provided prompt to initiate the dialogue generation. Default is an empty string.
+ - "model" : str, optional
+ The specific model to use for generation. Default is "glm-4-flash".
+ - "do_sample" : bool, optional
+ Whether to use sampling strategies during text generation. Default is True.
+ - "temperature" : float, optional
+ A sampling parameter to control the randomness of output. Must be between 0.0 and 1.0. Default is 0.95.
+ - "top_p" : float, optional
+ Nucleus sampling parameter controlling the cumulative probability range for token selection.
+ Must be between 0.0 and 1.0. Default is 0.7.
+ - "max_tokens" : int, optional
+ The maximum number of tokens to generate. Default is 4095.
+ - "num_samples" : int, optional
+ The number of dialogue samples to generate. Default is 1.
+
+ Returns
+ -------
+ list of dict
+ A list of dictionaries containing the generated dialogues. Each dictionary
+ includes the generated dialogue content.
+
+ Notes
+ -----
+ - The method will attempt to generate dialogues until a valid response is generated.
+ If the generated dialogue is `None`, it will retry.
+ - Progress is logged for each sample generated.
+ """
+
+ # Extract configuration parameters
+ system_prompt = tConfig.get("system_prompt", "")
+ user_prompt = tConfig.get("user_prompt", "")
+ do_sample = tConfig.get("do_sample", True)
+ temperature = tConfig.get("temperature", 0.95)
+ top_p = tConfig.get("top_p", 0.7)
+ max_tokens = tConfig.get("max_tokens", 4095)
+ num_samples = tConfig.get("num_samples", 1) # Default is to generate 1 sample
+
+ # List to store the generated dialogues
+ dialogues = []
+
+ # Generate dialogues for the specified number of samples
+ total_samples = num_samples # Total number of samples to generate
+ logger.info("Starting the data generation process.")
+ for _idx in range(1, num_samples + 1):
+ retry_count = 0 # 初始化重试计数
+ max_retries = 5 # 设置最大重试次数(根据需要调整)
+
+ while True: # Keep trying until valid dialogue data is generated
+ retry_count += 1
+
+ generated_dialogue = self.model.execute_request(
+ system_prompt=system_prompt,
+ user_prompt=user_prompt,
+ do_sample=do_sample,
+ temperature=temperature,
+ top_p=top_p,
+ max_tokens=max_tokens,
+ )
+
+ if "error" in generated_dialogue:
+ logger.warning(
+ "Sample %d: Request failed with error: %s. Retrying (%d/%d)...",
+ _idx,
+ generated_dialogue["error"],
+ retry_count,
+ max_retries,
+ )
+
+ if retry_count >= max_retries:
+ logger.error("Sample %d: Max retries reached. Skipping this sample.", _idx)
+ break # 跳出当前样本,进入下一个
+
+ continue # 继续当前样本的生成
+
+
+ # Convert the generated dialogue to the desired format (e.g., Alpaca format)
+ converted_generated_dialogue = self._convert_original_to_alpaca(system_prompt, generated_dialogue)
+
+ if converted_generated_dialogue is not None:
+ # If the dialogue is valid, append it to the results and break the loop
+ dialogues.append(converted_generated_dialogue)
+ break
+ else:
+ logger.warning(
+ "Sample %d: Generated dialogue is None. Retrying (%d/%d)...",
+ _idx,
+ retry_count,
+ max_retries,
+ )
+
+ if retry_count >= max_retries:
+ logger.error("Sample %d: Max retries reached. Skipping this sample.", _idx)
+ break # 跳出当前样本
+
+
+ # Log the progress of dialogue generation
+ progress = (_idx / total_samples) * 100
+ logger.info("Data generation progress: %.2f%% (%d/%d samples completed)", progress, _idx, total_samples)
+
+ return dialogues
diff --git a/build/lib/edg4llm/generators/text_generators/question_generator.py b/build/lib/edg4llm/generators/text_generators/question_generator.py
new file mode 100644
index 0000000..3a4d99e
--- /dev/null
+++ b/build/lib/edg4llm/generators/text_generators/question_generator.py
@@ -0,0 +1,151 @@
+import os
+from typing import Dict, List, Any
+from edg4llm.utils.logger import custom_logger
+from edg4llm.generators.text_generators.base_generator import BaseGenerator
+
+logger = custom_logger("QuestionGenerator")
+
+class QuestionGenerator(BaseGenerator):
+ """
+ A class for generating questions based on user prompts and configuration.
+
+ This class extends the `BaseGenerator` class and provides functionality to generate
+ questions using a specified model. It interacts with the model's `execute_request`
+ method to create output based on user-defined parameters such as sampling strategies,
+ temperature, and maximum tokens.
+
+ Attributes
+ ----------
+ model : object
+ The model interface used for generating questions.
+
+ Methods
+ -------
+ generate(tConfig: dict) -> list of dict:
+ Generates questions based on the provided configuration.
+
+ Notes
+ -----
+ - The `generate` method ensures valid responses are returned, retrying if necessary.
+ - Logs progress for each generated question.
+ """
+
+ def __init__(self, model):
+ """
+ Initialize the QuestionGenerator.
+
+ Parameters
+ ----------
+ model : object
+ The model interface used for generating questions.
+ """
+
+ super().__init__(model)
+
+ def generate(self, tConfig: Dict) -> List:
+ """
+ Generate questions based on the provided configuration.
+
+ This method generates one or more questions using the parameters specified
+ in the `tConfig` dictionary. It interacts with the model's `execute_request`
+ method to generate output based on user prompts and various sampling options.
+
+ Parameters
+ ----------
+ tConfig : dict
+ A dictionary containing configuration options for question generation:
+ - "system_prompt" : str, optional
+ A system-level instruction to guide the question generation. Default is an empty string.
+ - "user_prompt" : str, optional
+ A user-provided input to guide the question generation. Default is an empty string.
+ - "model" : str, optional
+ Specifies the model for text generation. Default is "glm-4-flash".
+ - "do_sample" : bool, optional
+ Whether to use sampling during generation. Default is True.
+ - "temperature" : float, optional
+ Controls randomness in output. Value should be between 0.0 and 1.0. Default is 0.95.
+ - "top_p" : float, optional
+ Nucleus sampling parameter to limit token selection to a cumulative probability. Default is 0.7.
+ - "max_tokens" : int, optional
+ The maximum number of tokens for the output. Default is 4095.
+ - "num_samples" : int, optional
+ The number of question samples to generate. Default is 1.
+
+ Returns
+ -------
+ list of dict
+ A list of dictionaries containing the generated questions.
+
+ Notes
+ -----
+ - The method retries generation until a valid response is obtained.
+ - Logs progress for each generated sample.
+ """
+
+ # Extract parameters from the configuration
+ system_prompt = tConfig.get("system_prompt", "")
+ user_prompt = tConfig.get("user_prompt", "")
+ do_sample = tConfig.get("do_sample", True)
+ temperature = tConfig.get("temperature", 0.95)
+ top_p = tConfig.get("top_p", 0.7)
+ max_tokens = tConfig.get("max_tokens", 4095)
+ num_samples = tConfig.get("num_samples", 1)
+
+ # Initialize a list to store generated questions
+ questions = []
+ cur_len = 0
+ # Generate questions for the specified number of samples
+ logger.info("Starting the data generation process.")
+ for _idx in range(1, num_samples + 1):
+ retry_count = 0 # 初始化重试计数
+ max_retries = 5 # 设置最大重试次数(根据需要调整)
+
+ while True: # Retry until a valid question is generated
+ retry_count += 1
+
+ generated_question = self.model.execute_request(
+ system_prompt=system_prompt,
+ user_prompt=user_prompt,
+ do_sample=do_sample,
+ temperature=temperature,
+ top_p=top_p,
+ max_tokens=max_tokens,
+ )
+
+ if "error" in generated_question:
+ logger.warning(
+ "Sample %d: Request failed with error: %s. Retrying (%d/%d)...",
+ _idx,
+ generated_question["error"],
+ retry_count,
+ max_retries,
+ )
+
+ if (retry_count >= max_retries):
+ logger.error("Sample %d: Max retries reached. Skipping this sample.", _idx)
+ break # 跳出当前样本
+
+ # Convert the raw output to a specific format
+ converted_question = self._convert_original_to_json(generated_question)
+
+ if converted_question is not None:
+ cur_len = len(converted_question)
+ questions.extend(converted_question)
+ break
+ else:
+ logger.warning(
+ "Sample %d: Generated dialogue is None. Retrying (%d/%d)...",
+ _idx,
+ retry_count,
+ max_retries,
+ )
+
+ if retry_count >= max_retries:
+ logger.error("Sample %d: Max retries reached. Skipping this sample.", _idx)
+ break # 跳出当前样本
+
+ # Log progress for tracking generation completion
+ progress = (_idx / num_samples) * 100
+ logger.info("Generation progress: %.2f%% (%d samples generated, %d/%d epoch completed)", progress, cur_len, _idx, num_samples)
+
+ return questions
diff --git a/build/lib/edg4llm/models/__init__.py b/build/lib/edg4llm/models/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/build/lib/edg4llm/models/baseModel.py b/build/lib/edg4llm/models/baseModel.py
new file mode 100644
index 0000000..d3c62b8
--- /dev/null
+++ b/build/lib/edg4llm/models/baseModel.py
@@ -0,0 +1,126 @@
+"""
+Module for defining the base class of EDG models.
+
+This file contains the abstract base class `EDGBaseModel`, which serves as a foundation for implementing various
+machine learning models. The class defines key methods that must be implemented by any derived model class
+to handle requests, send HTTP requests, and interact with APIs.
+
+Classes
+-------
+EDGBaseModel(ABC)
+ Abstract base class for EDG models, providing a standard structure for derived model implementations.
+
+Methods
+-------
+__init__(api_key: str = None, base_url: str = None, model_name: str = None)
+ Initializes the base model with API key, base URL, and model name.
+
+execute_request(system_prompt: str, user_prompt: str, **kwargs) -> str
+ Abstract method to process user input and generate model responses.
+ Must be implemented by derived classes.
+
+send_request(request: Dict[str, Any]) -> Dict[str, Any]
+ Abstract method to send HTTP requests and handle server interactions.
+ Must be implemented by derived classes.
+"""
+
+import requests
+from abc import ABC, abstractmethod
+from typing import Any, Dict
+
+from edg4llm.utils.logger import custom_logger
+
+logger = custom_logger('baseModel')
+
+
+class EDGBaseModel(ABC):
+ """
+ Abstract base class for EDG models.
+
+ This class defines the blueprint for machine learning model implementations. Derived classes must
+ implement methods to process user prompts, interact with APIs, and handle HTTP requests.
+
+ Attributes
+ ----------
+ api_key : str
+ The API key required for authenticating requests.
+
+ base_url : str
+ The base URL of the model API endpoint.
+
+ model_name : str
+ The name of the model, used to differentiate between various models.
+ """
+
+ def __init__(self, api_key: str = None, base_url: str = None, model_name: str = None):
+ """
+ Initializes the base model with API key, base URL, and model name.
+
+ Parameters
+ ----------
+ api_key : str, optional
+ The API key for authenticating requests. Default is None.
+
+ base_url : str, optional
+ The base URL of the model API endpoint. Default is None.
+
+ model_name : str, optional
+ The name of the model, used for identifying different models. Default is None.
+ """
+ self.api_key = api_key
+ self.base_url = base_url
+ self.model_name = model_name
+
+ @abstractmethod
+ def execute_request(self, system_prompt: str, user_prompt: str, **kwargs) -> str:
+ """
+ Abstract method to process and execute a request.
+
+ This method must be implemented by derived classes. It processes user input and generates
+ responses based on a system prompt and additional parameters.
+
+ Parameters
+ ----------
+ system_prompt : str
+ The system-level instruction or prompt defining the role or behavior of the model.
+
+ user_prompt : str
+ The user's input or query for the model.
+
+ kwargs : dict
+ Additional parameters for processing the request.
+
+ Returns
+ -------
+ str
+ The response generated by the model.
+
+ Notes
+ -----
+ - Derived classes should implement this method to handle the specific logic for generating responses.
+ """
+ pass
+
+ @abstractmethod
+ def send_request(self, request: Dict[str, Any]) -> Dict[str, Any]:
+ """
+ Abstract method to send HTTP requests.
+
+ This method must be implemented by derived classes to handle API interactions and perform
+ error handling for HTTP requests.
+
+ Parameters
+ ----------
+ request : dict
+ A dictionary containing all necessary information for the HTTP request.
+
+ Returns
+ -------
+ dict
+ The server's response as a dictionary.
+
+ Notes
+ -----
+ - Derived classes should implement this method to handle API-specific logic and error handling.
+ """
+ pass
diff --git a/build/lib/edg4llm/models/chatglm.py b/build/lib/edg4llm/models/chatglm.py
new file mode 100644
index 0000000..5c99629
--- /dev/null
+++ b/build/lib/edg4llm/models/chatglm.py
@@ -0,0 +1,273 @@
+import os
+import requests
+from typing import TYPE_CHECKING, Any, Callable, Dict, List, Optional, Union, cast
+
+from edg4llm.utils.logger import custom_logger
+from edg4llm.models.baseModel import EDGBaseModel
+from edg4llm.utils.exceptions import HttpClientError, InvalidPromptError
+
+logger = custom_logger('chatglm')
+
+class EDGChatGLM(EDGBaseModel):
+ """
+ EDGChatGLM interface for interacting with the ChatGLM model to generate text based on given prompts.
+
+ This class provides an interface to interact with the ChatGLM model for generating text
+ based on a system and user prompt. It supports customizable parameters such as temperature,
+ sampling strategies, and model selection. It also handles HTTP requests and error management.
+
+ Parameters
+ ----------
+ base_url : str, optional
+ The base URL for the ChatGLM API. If not provided, defaults to None.
+ api_key : str, optional
+ The API key for authenticating with the ChatGLM API. If not provided, defaults to None.
+ """
+
+ def __init__(self, base_url: str = None, api_key: str = None, model_name: str = 'glm-4-flash'):
+ """
+ Initialize the ChatGLM model interface.
+
+ This constructor initializes the `EDGChatGLM` class by calling the base class constructor
+ and passing the API key, base URL, and model name ("ChatGLM"). It sets up the necessary
+ configuration for interacting with the ChatGLM API.
+
+ Parameters
+ ----------
+ base_url : str, optional
+ The base URL for the ChatGLM API. Default is None.
+ api_key : str, optional
+ The API key for authenticating with the ChatGLM API. Default is None.
+ model_name: str, optional
+ The specific model to use within the selected provider. Default is "glm-4-flash".
+ Notes
+ -----
+ The base URL and API key are required for successful communication with the ChatGLM API.
+ """
+ super().__init__(api_key, base_url, model_name=model_name)
+
+ def execute_request(
+ self,
+ system_prompt: str = None,
+ user_prompt: str = None,
+ do_sample: bool = True,
+ temperature: float = 0.95,
+ top_p: float = 0.7,
+ max_tokens: int = 4095
+ ) -> str:
+ """
+ Generate text using the ChatGLM model based on the provided prompts and parameters.
+
+ This method calls the internal request execution function and handles the text
+ generation process using the specified system and user prompts. It allows controlling
+ text generation via parameters such as temperature, sampling strategy, and token limits.
+
+ Parameters
+ ----------
+ system_prompt : str, optional
+ The system-level prompt that sets the context for the conversation. Default is None.
+ user_prompt : str, optional
+ The user-provided prompt that initiates the conversation. Default is None.
+ do_sample : bool, optional
+ Whether to use sampling during text generation. Default is True.
+ temperature : float, optional
+ Sampling temperature to control randomness. Default is 0.95.
+ top_p : float, optional
+ Nucleus sampling parameter for controlling randomness. Default is 0.7.
+ max_tokens : int, optional
+ The maximum number of tokens to generate in the output. Default is 4095.
+
+ Returns
+ -------
+ str
+ The generated text content from the model.
+
+ Raises
+ ------
+ InvalidPromptError
+ If both the system and user prompts are None.
+ """
+ response = self._execute_request(system_prompt, user_prompt, self.model_name, do_sample, temperature, top_p, max_tokens)
+ return response
+
+ def send_request(self, request: Dict[str, Any]) -> Dict[str, Any]:
+ """
+ Send an HTTP request to the ChatGLM API.
+
+ This method sends a POST request to the ChatGLM API with the provided request data.
+ It returns the response data as a dictionary.
+
+ Parameters
+ ----------
+ request : dict
+ A dictionary containing the request data, including the URL, headers, and JSON body.
+
+ Returns
+ -------
+ dict
+ The response from the API in the form of a dictionary.
+
+ Raises
+ ------
+ HttpClientError
+ If any error occurs during the HTTP request process.
+ """
+ response = self._send_request(request=request)
+ return response
+
+ def _send_request(self, request: Dict[str, Any]) -> Dict[str, Any]:
+ """
+ Internal method to send a POST request to the ChatGLM API.
+
+ This method handles the actual HTTP POST request to the ChatGLM API. It includes
+ error handling for HTTP errors, connection issues, timeouts, and JSON decoding.
+
+ Parameters
+ ----------
+ request : dict
+ A dictionary containing the request data, including the URL, headers, and JSON body.
+
+ Returns
+ -------
+ dict
+ The JSON response from the API.
+
+ Raises
+ ------
+ HttpClientError
+ If an error occurs during the request.
+ """
+ url = request.get("url", "https://open.bigmodel.cn/api/paas/v4/chat/completions")
+ headers = {**request.get("headers", {})}
+ json = request.get("json", {})
+ try:
+ response = requests.post(
+ url=url,
+ headers=headers,
+ json=json,
+ timeout=30,
+ )
+ response.raise_for_status()
+ return response.json()["choices"][0]["message"]["content"].strip()
+
+ except requests.exceptions.HTTPError as e:
+ # Handle HTTP error exceptions
+ status_code = e.response.status_code
+ logger.error(
+ "HTTP error occurred. Status Code: %s, URL: %s, Message: %s",
+ status_code,
+ url,
+ e,
+ )
+
+ return {"error": "HTTP error", "status_code": status_code, "message": str(e)}
+
+
+ except requests.exceptions.ConnectionError as e:
+ # Handle connection errors
+ logger.error("Connection error occurred while connecting to %s: %s", url, e)
+
+ return {"error": "Connection error", "message": str(e)}
+
+ except requests.exceptions.Timeout as e:
+ # Handle timeout errors
+ logger.error("Timeout occurred while sending request to %s: %s", url, e)
+
+ return {"error": "Timeout", "message": str(e)}
+
+
+ except requests.exceptions.RequestException as e:
+ # Handle any generic request exceptions
+ logger.error(
+ "Request exception occurred while sending request to %s: %s", url, e
+ )
+
+ return {"error": "Request exception", "message": str(e)}
+
+
+ except ValueError as e:
+ # Handle JSON decoding errors
+ logger.error("JSON decoding error occurred: %s", e)
+
+ return {"error": "JSON decoding error", "message": str(e)}
+
+ except Exception as e:
+ # Catch any unexpected errors
+ logger.critical(
+ "An unexpected error occurred while sending request to %s: %s", url, e
+ )
+
+ return {"error": "Unexpected error", "message": str(e)}
+
+ def _execute_request(
+ self,
+ system_prompt: str = None,
+ user_prompt: str = None,
+ model: str = "glm-4-flash",
+ do_sample: bool = True,
+ temperature: float = 0.95,
+ top_p: float = 0.7,
+ max_tokens: int = 4095
+ ) -> str:
+ """
+ Internal method to prepare the request data and execute the request for text generation.
+
+ This method prepares the necessary data (including headers, JSON body) for the
+ ChatGLM API request and then calls the `send_request` method to send the request
+ and return the response.
+
+ Parameters
+ ----------
+ system_prompt : str, optional
+ The system-level prompt that provides context for the dialogue generation.
+ Default is None.
+ user_prompt : str, optional
+ The user-provided prompt that initiates the generation.
+ Default is None.
+ model : str, optional
+ The model to use for the generation. Default is "glm-4-flash".
+ do_sample : bool, optional
+ Whether to use sampling during text generation. Default is True.
+ temperature : float, optional
+ Sampling temperature to control randomness. Default is 0.95.
+ top_p : float, optional
+ Nucleus sampling parameter for controlling randomness. Default is 0.7.
+ max_tokens : int, optional
+ The maximum number of tokens to generate. Default is 4095.
+
+ Returns
+ -------
+ str
+ The generated text content from the model.
+
+ Raises
+ ------
+ InvalidPromptError
+ If both the system and user prompts are None.
+ """
+ if (system_prompt is None and user_prompt is None):
+ logger.error("Both prompts cannot be empty")
+ raise InvalidPromptError("Both prompts cannot be empty")
+
+ request_data = {
+ "url": f"{self.base_url}",
+ "headers": {
+ "Authorization": f"Bearer {self.api_key}",
+ "Content-Type": "application/json",
+ },
+ "json": {
+ "model": model,
+ "messages": [
+ {"role": "system", "content": system_prompt},
+ {"role": "user", "content": user_prompt},
+ ],
+ "do_sample": do_sample,
+ "temperature": temperature,
+ "top_p": top_p,
+ "max_tokens": max_tokens,
+ },
+ }
+
+ response = self.send_request(request_data)
+
+ return response
diff --git a/build/lib/edg4llm/models/chatgpt.py b/build/lib/edg4llm/models/chatgpt.py
new file mode 100644
index 0000000..6b7ad18
--- /dev/null
+++ b/build/lib/edg4llm/models/chatgpt.py
@@ -0,0 +1,286 @@
+import os
+import requests
+from typing import TYPE_CHECKING, Any, Callable, Dict, List, Optional, Union, cast
+
+from edg4llm.utils.logger import custom_logger
+from edg4llm.models.baseModel import EDGBaseModel
+from edg4llm.utils.exceptions import HttpClientError, InvalidPromptError
+
+logger = custom_logger('chatgpt')
+
+class EDGChatGPT(EDGBaseModel):
+ """
+ A class to interface with the ChatGPT model for text generation.
+
+ This class extends the `EDGBaseModel` abstract base class to implement a specific interface
+ for interacting with the ChatGPT API. It supports text generation using system-level and
+ user-level prompts with customizable parameters such as temperature, sampling strategies,
+ and token limits. The class also includes methods to handle HTTP requests and manage errors.
+
+ Attributes
+ ----------
+ base_url : str
+ The base URL for the ChatGPT API endpoint.
+ api_key : str
+ The API key for authenticating with the ChatGPT API.
+ model_name : str
+ The specific model to use, defaulting to "gpt-4o-mini".
+
+ Methods
+ -------
+ execute_request(system_prompt: str, user_prompt: str, do_sample: bool, temperature: float, top_p: float, max_tokens: int) -> str:
+ Generates text using the ChatGPT model based on the provided prompts and parameters.
+
+ send_request(request: Dict[str, Any]) -> Dict[str, Any]:
+ Sends an HTTP POST request to the ChatGPT API and returns the response as a dictionary.
+
+ Notes
+ -----
+ - The `base_url` and `api_key` are required for proper communication with the ChatGPT API.
+ - Provides detailed error handling for HTTP, connection, timeout, and JSON decoding issues.
+ - Supports customizable text generation parameters for flexibility in model behavior.
+ """
+
+ def __init__(self, base_url:str = None, api_key: str = None, model_name: str = "gpt-4o-mini"):
+ """
+ Initialize the ChatGPT model interface.
+
+ Parameters
+ ----------
+ base_url : str, optional
+ The base URL for the ChatGPT API. Default is None.
+ api_key : str, optional
+ The API key for authenticating with the ChatGPT API. Default is None.
+ model_name : str, optional
+ The specific model to use, defaulting to "gpt-4o-mini".
+ """
+
+ super().__init__(api_key, base_url, model_name=model_name)
+
+ def execute_request(
+ self
+ , system_prompt: str = None
+ , user_prompt: str = None
+ , do_sample: bool = True
+ , temperature: float = 0.95
+ , top_p: float = 0.7
+ , max_tokens: int = 4095
+ ) -> str:
+
+ """
+ Generate text using the ChatGPT model based on the provided prompts and parameters.
+
+ Parameters
+ ----------
+ system_prompt : str, optional
+ The system-level prompt providing context for the text generation. Default is None.
+ user_prompt : str, optional
+ The user-provided prompt initiating the text generation. Default is None.
+ do_sample : bool, optional
+ Whether to use sampling during text generation. Default is True.
+ temperature : float, optional
+ Sampling temperature to control randomness. Default is 0.95.
+ top_p : float, optional
+ Nucleus sampling parameter to control randomness. Default is 0.7.
+ max_tokens : int, optional
+ The maximum number of tokens to generate. Default is 4095.
+
+ Returns
+ -------
+ str
+ The generated text content from the model.
+
+ Raises
+ ------
+ InvalidPromptError
+ If both system and user prompts are None.
+ """
+
+ response = self._execute_request(system_prompt, user_prompt, self.model_name, do_sample, temperature, top_p, max_tokens)
+ return response
+
+ def send_request(self, request: Dict[str, Any]) -> Dict[str, Any]:
+
+ """
+ Send an HTTP request to the ChatGPT API.
+
+ Parameters
+ ----------
+ request : dict
+ A dictionary containing the request data, including the URL, headers, and JSON body.
+
+ Returns
+ -------
+ dict
+ The response from the API in the form of a dictionary.
+
+ Raises
+ ------
+ HttpClientError
+ If any error occurs during the HTTP request process.
+ """
+
+ response = self._send_request(request=request)
+ return response
+
+ def _send_request(self, request: Dict[str, Any]) -> Dict[str, Any]:
+
+ """
+ Internal method to send an HTTP POST request to the ChatGPT API.
+
+ This method handles the actual HTTP POST request and manages error handling
+ for issues like connection failures, timeouts, and JSON decoding errors.
+
+ Parameters
+ ----------
+ request : dict
+ A dictionary containing the request data, including the URL, headers, and JSON body.
+
+ Returns
+ -------
+ dict
+ The JSON response from the API.
+
+ Raises
+ ------
+ HttpClientError
+ If an error occurs during the HTTP request.
+ """
+
+ url = request.get("url", "https://api.openai.com/v1/chat/completions")
+ headers = {**request.get("headers", {})}
+ json = request.get("json", {})
+ try:
+ response = requests.post(
+ url=url,
+ headers=headers,
+ json=json,
+ timeout=30,
+ )
+
+ response.raise_for_status()
+
+ return response.json()["choices"][0]["message"]["content"].strip()
+
+ except requests.exceptions.HTTPError as e:
+ # Handle HTTP error exceptions
+ status_code = e.response.status_code
+ logger.error(
+ "HTTP error occurred. Status Code: %s, URL: %s, Message: %s",
+ status_code,
+ url,
+ e,
+ )
+
+ return {"error": "HTTP error", "status_code": status_code, "message": str(e)}
+
+
+ except requests.exceptions.ConnectionError as e:
+ # Handle connection errors
+ logger.error("Connection error occurred while connecting to %s: %s", url, e)
+
+ return {"error": "Connection error", "message": str(e)}
+
+ except requests.exceptions.Timeout as e:
+ # Handle timeout errors
+ logger.error("Timeout occurred while sending request to %s: %s", url, e)
+
+ return {"error": "Timeout", "message": str(e)}
+
+
+ except requests.exceptions.RequestException as e:
+ # Handle any generic request exceptions
+ logger.error(
+ "Request exception occurred while sending request to %s: %s", url, e
+ )
+
+ return {"error": "Request exception", "message": str(e)}
+
+
+ except ValueError as e:
+ # Handle JSON decoding errors
+ logger.error("JSON decoding error occurred: %s", e)
+
+ return {"error": "JSON decoding error", "message": str(e)}
+
+ except Exception as e:
+ # Catch any unexpected errors
+ logger.critical(
+ "An unexpected error occurred while sending request to %s: %s", url, e
+ )
+
+ return {"error": "Unexpected error", "message": str(e)}
+
+
+ def _execute_request(
+ self
+ , system_prompt: str = None
+ , user_prompt: str = None
+ , model: str = "gpt-4o-mini"
+ , do_sample: bool = True
+ , temperature: float = 0.95
+ , top_p: float = 0.7
+ , max_tokens: int = 4095
+ ) -> str:
+
+ """
+ Internal method to prepare and execute the API request for text generation.
+
+ Parameters
+ ----------
+ system_prompt : str, optional
+ The system-level prompt providing context for the text generation. Default is None.
+ user_prompt : str, optional
+ The user-provided prompt initiating the text generation. Default is None.
+ model : str, optional
+ The specific model to use for text generation. Default is "gpt-4o-mini".
+ do_sample : bool, optional
+ Whether to use sampling during text generation. Default is True.
+ temperature : float, optional
+ Sampling temperature to control randomness. Default is 0.95.
+ top_p : float, optional
+ Nucleus sampling parameter to control randomness. Default is 0.7.
+ max_tokens : int, optional
+ The maximum number of tokens to generate. Default is 4095.
+
+ Returns
+ -------
+ str
+ The generated text content from the model.
+
+ Raises
+ ------
+ InvalidPromptError
+ If both system and user prompts are None.
+ """
+
+ if (system_prompt is None and user_prompt is None):
+ logger.error("prompt不能同时为空")
+ raise InvalidPromptError("prompt不能同时为空")
+
+ request_data = {
+ "url": f"{self.base_url}",
+ "headers": {
+ "Authorization": f"Bearer {self.api_key}",
+ "Content-Type": "application/json",
+ },
+ "json": {
+ "model": model,
+ "messages": [
+ {
+ "role": "developer",
+ "content": system_prompt,
+ },
+ {
+ "role": "user",
+ "content": user_prompt,
+ }
+ ],
+ "temperature": temperature,
+ "top_p": top_p,
+ "max_tokens": max_tokens
+ },
+ }
+
+ response = self.send_request(request_data)
+ return response
diff --git a/build/lib/edg4llm/models/deepseek.py b/build/lib/edg4llm/models/deepseek.py
new file mode 100644
index 0000000..edd6990
--- /dev/null
+++ b/build/lib/edg4llm/models/deepseek.py
@@ -0,0 +1,294 @@
+import os
+import json
+import requests
+from typing import TYPE_CHECKING, Any, Callable, Dict, List, Optional, Union, cast
+
+from edg4llm.utils.logger import custom_logger
+from edg4llm.models.baseModel import EDGBaseModel
+from edg4llm.utils.exceptions import HttpClientError, InvalidPromptError
+
+logger = custom_logger('deepseek')
+
+class EDGDeepSeek(EDGBaseModel):
+ """
+ A class to interface with the DeepSeek model for text generation.
+
+ This class extends the `EDGBaseModel` abstract base class to implement a specific interface
+ for interacting with the DeepSeek API. It allows generating text based on system-level and
+ user-level prompts, with customizable parameters such as temperature, sampling strategies,
+ and token limits. The class includes methods to handle HTTP requests and manage errors
+ specific to the DeepSeek API.
+
+ Attributes
+ ----------
+ base_url : str
+ The base URL for the DeepSeek API endpoint.
+ api_key : str
+ The API key for authenticating with the DeepSeek API.
+ model_name : str
+ The specific model to use, defaulting to "deepseek-chat".
+
+ Methods
+ -------
+ execute_request(system_prompt: str, user_prompt: str, do_sample: bool, temperature: float, top_p: float, max_tokens: int) -> str:
+ Generates text using the DeepSeek model based on the provided prompts and parameters.
+
+ send_request(request: Dict[str, Any]) -> Dict[str, Any]:
+ Sends an HTTP POST request to the DeepSeek API and returns the response as a dictionary.
+
+ Notes
+ -----
+ - The `base_url` and `api_key` are required for proper communication with the DeepSeek API.
+ - Provides detailed error handling for HTTP, connection, timeout, and JSON decoding issues.
+ - Supports customizable text generation parameters for flexibility in model behavior.
+ """
+
+ def __init__(self, base_url:str = None, api_key: str = None, model_name: str = "deepseek-chat"):
+ """
+ Initialize the DeepSeek model interface.
+
+ Parameters
+ ----------
+ base_url : str, optional
+ The base URL for the DeepSeek API. Default is None.
+ api_key : str, optional
+ The API key for authenticating with the DeepSeek API. Default is None.
+ model_name : str, optional
+ The specific model to use, defaulting to "deepseek-chat".
+ """
+
+ super().__init__(api_key=api_key, base_url=base_url, model_name = model_name)
+
+ def execute_request(
+ self
+ , system_prompt: str = None
+ , user_prompt: str = None
+ , do_sample: bool = True
+ , temperature: float = 0.95
+ , top_p: float = 0.7
+ , max_tokens: int = 4095
+ ) -> str:
+ """
+ Generate text using the DeepSeek model based on the provided prompts and parameters.
+
+ Parameters
+ ----------
+ system_prompt : str, optional
+ The system-level prompt providing context for the text generation. Default is None.
+ user_prompt : str, optional
+ The user-provided prompt initiating the text generation. Default is None.
+ do_sample : bool, optional
+ Whether to use sampling during text generation. Default is True.
+ temperature : float, optional
+ Sampling temperature to control randomness. Default is 0.95.
+ top_p : float, optional
+ Nucleus sampling parameter to control randomness. Default is 0.7.
+ max_tokens : int, optional
+ The maximum number of tokens to generate. Default is 4095.
+
+ Returns
+ -------
+ str
+ The generated text content from the model.
+
+ Raises
+ ------
+ InvalidPromptError
+ If both system and user prompts are None.
+ """
+
+ response = self._execute_request(system_prompt, user_prompt, self.model_name, do_sample, temperature, top_p, max_tokens)
+ return response
+
+ def send_request(self, request: Dict[str, Any]) -> Dict[str, Any]:
+ """
+ Send an HTTP request to the DeepSeek API.
+
+ Parameters
+ ----------
+ request : dict
+ A dictionary containing the request data, including the URL, headers, and JSON body.
+
+ Returns
+ -------
+ dict
+ The response from the API in the form of a dictionary.
+
+ Raises
+ ------
+ HttpClientError
+ If any error occurs during the HTTP request process.
+ """
+
+ response = self._send_request(request=request)
+ return response
+
+ def _send_request(self, request: Dict[str, Any]) -> Dict[str, Any]:
+ """
+ Internal method to send an HTTP POST request to the DeepSeek API.
+
+ This method handles the actual HTTP POST request and manages error handling
+ for issues like connection failures, timeouts, and JSON decoding errors.
+
+ Parameters
+ ----------
+ request : dict
+ A dictionary containing the request data, including the URL, headers, and JSON body.
+
+ Returns
+ -------
+ dict
+ The JSON response from the API.
+
+ Raises
+ ------
+ HttpClientError
+ If an error occurs during the HTTP request.
+ """
+
+ url = request.get("url", "https://api.deepseek.com/chat/completions")
+ headers = {**request.get("headers", {})}
+ data = request.get("data", {})
+
+ if isinstance(data, dict):
+ data = json.dumps(data)
+
+ try:
+ response = requests.request(
+ "POST",
+ url=url,
+ headers=headers,
+ data=data,
+ # timeout=30,
+ )
+
+ response.raise_for_status()
+ return response.json()["choices"][0]["message"]["content"].strip()
+
+ except requests.exceptions.HTTPError as e:
+ # Handle HTTP error exceptions
+ status_code = e.response.status_code
+ logger.error(
+ "HTTP error occurred. Status Code: %s, URL: %s, Message: %s",
+ status_code,
+ url,
+ e,
+ )
+
+ return {"error": "HTTP error", "status_code": status_code, "message": str(e)}
+
+
+ except requests.exceptions.ConnectionError as e:
+ # Handle connection errors
+ logger.error("Connection error occurred while connecting to %s: %s", url, e)
+
+ return {"error": "Connection error", "message": str(e)}
+
+ except requests.exceptions.Timeout as e:
+ # Handle timeout errors
+ logger.error("Timeout occurred while sending request to %s: %s", url, e)
+
+ return {"error": "Timeout", "message": str(e)}
+
+
+ except requests.exceptions.RequestException as e:
+ # Handle any generic request exceptions
+ logger.error(
+ "Request exception occurred while sending request to %s: %s", url, e
+ )
+
+ return {"error": "Request exception", "message": str(e)}
+
+
+ except ValueError as e:
+ # Handle JSON decoding errors
+ logger.error("JSON decoding error occurred: %s", e)
+
+ return {"error": "JSON decoding error", "message": str(e)}
+
+ except Exception as e:
+ # Catch any unexpected errors
+ logger.critical(
+ "An unexpected error occurred while sending request to %s: %s", url, e
+ )
+
+ return {"error": "Unexpected error", "message": str(e)}
+
+ def _execute_request(
+ self
+ , system_prompt: str = None
+ , user_prompt: str = None
+ , model: str = "deepseek-chat"
+ , do_sample: bool = True
+ , temperature: float = 0.95
+ , top_p: float = 0.7
+ , max_tokens: int = 2047
+ ) -> str:
+
+ """
+ Internal method to prepare and execute the API request for text generation.
+
+ Parameters
+ ----------
+ system_prompt : str, optional
+ The system-level prompt providing context for the text generation. Default is None.
+ user_prompt : str, optional
+ The user-provided prompt initiating the text generation. Default is None.
+ model : str, optional
+ The specific model to use for text generation. Default is "deepseek-chat".
+ do_sample : bool, optional
+ Whether to use sampling during text generation. Default is True.
+ temperature : float, optional
+ Sampling temperature to control randomness. Default is 0.95.
+ top_p : float, optional
+ Nucleus sampling parameter to control randomness. Default is 0.7.
+ max_tokens : int, optional
+ The maximum number of tokens to generate. Default is 2047.
+
+ Returns
+ -------
+ str
+ The generated text content from the model.
+
+ Raises
+ ------
+ InvalidPromptError
+ If both system and user prompts are None.
+ """
+
+ if (system_prompt is None and user_prompt is None):
+ logger.error("prompt不能同时为空")
+ raise InvalidPromptError("prompt不能同时为空")
+
+ request_data = {
+ "url": self.base_url,
+ "data": {
+ "messages": [
+ {"content": system_prompt, "role": "system"},
+ {"content": user_prompt, "role": "user"}
+ ],
+ "model": model,
+ "frequency_penalty": 0,
+ "max_tokens": max_tokens,
+ "presence_penalty": 0,
+ "response_format": {"type": "text"},
+ "stop": None,
+ "stream": False,
+ "stream_options": None,
+ "temperature": temperature,
+ "top_p": top_p,
+ "tools": None,
+ "tool_choice": "none",
+ "logprobs": False,
+ "top_logprobs": None
+ },
+ "headers": {
+ 'Content-Type': 'application/json',
+ 'Accept': 'application/json',
+ 'Authorization': f'Bearer {self.api_key}'
+ }
+ }
+
+ response = self._send_request(request_data)
+
+ return response
diff --git a/build/lib/edg4llm/models/internlm.py b/build/lib/edg4llm/models/internlm.py
new file mode 100644
index 0000000..6f51807
--- /dev/null
+++ b/build/lib/edg4llm/models/internlm.py
@@ -0,0 +1,281 @@
+import os
+import requests
+from typing import TYPE_CHECKING, Any, Callable, Dict, List, Optional, Union, cast
+
+from edg4llm.utils.logger import custom_logger
+from edg4llm.models.baseModel import EDGBaseModel
+from edg4llm.utils.exceptions import HttpClientError, InvalidPromptError
+
+logger = custom_logger('internlm')
+
+class EDGInternLM(EDGBaseModel):
+ """
+ A class to interface with the InternLM model for text generation.
+
+ This class extends the `EDGBaseModel` abstract base class to implement a specific interface
+ for interacting with the InternLM API. It allows generating text based on system-level and
+ user-level prompts, with customizable parameters such as temperature, sampling strategies,
+ and token limits. The class includes methods to handle HTTP requests and manage errors
+ specific to the InternLM API.
+
+ Attributes
+ ----------
+ base_url : str
+ The base URL for the InternLM API endpoint.
+ api_key : str
+ The API key for authenticating with the InternLM API.
+ model_name : str
+ The specific model to use, defaulting to "internlm2.5-latest".
+
+ Methods
+ -------
+ execute_request(system_prompt: str, user_prompt: str, model: str, do_sample: bool, temperature: float, top_p: float, max_tokens: int) -> str:
+ Generates text using the InternLM model based on the provided prompts and parameters.
+
+ send_request(request: Dict[str, Any]) -> Dict[str, Any]:
+ Sends an HTTP POST request to the InternLM API and returns the response as a dictionary.
+
+ Notes
+ -----
+ - The `base_url` and `api_key` are required for proper communication with the InternLM API.
+ - Provides detailed error handling for HTTP, connection, timeout, and JSON decoding issues.
+ - Supports customizable text generation parameters for flexibility in model behavior.
+ """
+
+ def __init__(self, base_url:str = None, api_key: str = None, model_name: str = "internlm2.5-latest"):
+ """
+ Initialize the InternLM model interface.
+
+ Parameters
+ ----------
+ base_url : str, optional
+ The base URL for the InternLM API. Default is None.
+ api_key : str, optional
+ The API key for authenticating with the InternLM API. Default is None.
+ model_name : str, optional
+ The specific model to use, defaulting to "internlm2.5-latest".
+ """
+ super().__init__(api_key, base_url, model_name=model_name)
+
+ def execute_request(
+ self
+ , system_prompt: str = None
+ , user_prompt: str = None
+ , model: str = "internlm2.5-latest"
+ , do_sample: bool = True
+ , temperature: float = 0.95
+ , top_p: float = 0.7
+ , max_tokens: int = 4095
+ ) -> str:
+ """
+ Generate text using the InternLM model based on the provided prompts and parameters.
+
+ Parameters
+ ----------
+ system_prompt : str, optional
+ The system-level prompt providing context for the text generation. Default is None.
+ user_prompt : str, optional
+ The user-provided prompt initiating the text generation. Default is None.
+ model : str, optional
+ The specific model to use for text generation, defaulting to "internlm2.5-latest".
+ do_sample : bool, optional
+ Whether to use sampling during text generation. Default is True.
+ temperature : float, optional
+ Sampling temperature to control randomness. Default is 0.95.
+ top_p : float, optional
+ Nucleus sampling parameter to control randomness. Default is 0.7.
+ max_tokens : int, optional
+ The maximum number of tokens to generate. Default is 4095.
+
+ Returns
+ -------
+ str
+ The generated text content from the model.
+
+ Raises
+ ------
+ InvalidPromptError
+ If both system and user prompts are None.
+ """
+
+ response = self._execute_request(system_prompt, user_prompt, model, do_sample, temperature, top_p, max_tokens)
+ return response
+
+ def send_request(self, request: Dict[str, Any]) -> Dict[str, Any]:
+ """
+ Send an HTTP request to the InternLM API.
+
+ Parameters
+ ----------
+ request : dict
+ A dictionary containing the request data, including the URL, headers, and JSON body.
+
+ Returns
+ -------
+ dict
+ The response from the API in the form of a dictionary.
+
+ Raises
+ ------
+ HttpClientError
+ If any error occurs during the HTTP request process.
+ """
+
+ response = self._send_request(request=request)
+ return response
+
+ def _send_request(self, request: Dict[str, Any]) -> Dict[str, Any]:
+ """
+ Internal method to send an HTTP POST request to the InternLM API.
+
+ This method handles the actual HTTP POST request and manages error handling
+ for issues like connection failures, timeouts, and JSON decoding errors.
+
+ Parameters
+ ----------
+ request : dict
+ A dictionary containing the request data, including the URL, headers, and JSON body.
+
+ Returns
+ -------
+ dict
+ The JSON response from the API.
+
+ Raises
+ ------
+ HttpClientError
+ If an error occurs during the HTTP request.
+ """
+
+ url = request.get("url", "https://internlm-chat.intern-ai.org.cn/puyu/api/v1/chat/completions")
+ headers = {**request.get("headers", {})}
+ json = request.get("json", {})
+ try:
+ response = requests.post(
+ url=url,
+ headers=headers,
+ json=json,
+ timeout=30,
+ )
+
+ response.raise_for_status()
+ return response.json()["choices"][0]["message"]["content"].strip()
+
+ except requests.exceptions.HTTPError as e:
+ # Handle HTTP error exceptions
+ status_code = e.response.status_code
+ logger.error(
+ "HTTP error occurred. Status Code: %s, URL: %s, Message: %s",
+ status_code,
+ url,
+ e,
+ )
+
+ return {"error": "HTTP error", "status_code": status_code, "message": str(e)}
+
+
+ except requests.exceptions.ConnectionError as e:
+ # Handle connection errors
+ logger.error("Connection error occurred while connecting to %s: %s", url, e)
+
+ return {"error": "Connection error", "message": str(e)}
+
+ except requests.exceptions.Timeout as e:
+ # Handle timeout errors
+ logger.error("Timeout occurred while sending request to %s: %s", url, e)
+
+ return {"error": "Timeout", "message": str(e)}
+
+ except requests.exceptions.RequestException as e:
+ # Handle any generic request exceptions
+ logger.error(
+ "Request exception occurred while sending request to %s: %s", url, e
+ )
+
+ return {"error": "Request exception", "message": str(e)}
+
+ except ValueError as e:
+ # Handle JSON decoding errors
+ logger.error("JSON decoding error occurred: %s", e)
+
+ return {"error": "JSON decoding error", "message": str(e)}
+
+ except Exception as e:
+ # Catch any unexpected errors
+ logger.critical(
+ "An unexpected error occurred while sending request to %s: %s", url, e
+ )
+
+ return {"error": "Unexpected error", "message": str(e)}
+
+ def _execute_request(
+ self
+ , system_prompt: str = None
+ , user_prompt: str = None
+ , model: str = "glm-4-flash"
+ , do_sample: bool = True
+ , temperature: float = 0.95
+ , top_p: float = 0.7
+ , max_tokens: int = 4095
+ ) -> str:
+ """
+ Internal method to prepare and execute the API request for text generation.
+
+ Parameters
+ ----------
+ system_prompt : str, optional
+ The system-level prompt providing context for the text generation. Default is None.
+ user_prompt : str, optional
+ The user-provided prompt initiating the text generation. Default is None.
+ model : str, optional
+ The specific model to use for text generation. Default is "internlm2.5-latest".
+ do_sample : bool, optional
+ Whether to use sampling during text generation. Default is True.
+ temperature : float, optional
+ Sampling temperature to control randomness. Default is 0.95.
+ top_p : float, optional
+ Nucleus sampling parameter to control randomness. Default is 0.7.
+ max_tokens : int, optional
+ The maximum number of tokens to generate. Default is 4095.
+
+ Returns
+ -------
+ str
+ The generated text content from the model.
+
+ Raises
+ ------
+ InvalidPromptError
+ If both system and user prompts are None.
+ """
+
+ if (system_prompt is None and user_prompt is None):
+ logger.error("prompt不能同时为空")
+ raise InvalidPromptError("prompt不能同时为空")
+
+ request_data = {
+ "url": f"{self.base_url}",
+ "headers": {
+ "Authorization": f"Bearer {self.api_key}",
+ "Content-Type": "application/json",
+ },
+ "json": {
+ "model": model,
+ "messages": [
+ {
+ "role": "system",
+ "content": system_prompt,
+ },
+ {
+ "role": "user",
+ "content": user_prompt,
+ }
+ ],
+ "temperature": temperature,
+ "top_p": top_p,
+ "max_tokens": max_tokens
+ },
+ }
+
+ response = self.send_request(request_data)
+ return response
diff --git a/build/lib/edg4llm/processor/__init__.py b/build/lib/edg4llm/processor/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/build/lib/edg4llm/processor/postprocess.py b/build/lib/edg4llm/processor/postprocess.py
new file mode 100644
index 0000000..fd18904
--- /dev/null
+++ b/build/lib/edg4llm/processor/postprocess.py
@@ -0,0 +1,231 @@
+import json
+from typing import Dict, List, Any
+
+from edg4llm.utils.logger import custom_logger
+
+logger = custom_logger("PostProcessor")
+
+class PostProcessor:
+ """
+ A class for post-processing conversation and question data.
+
+ This class provides methods to clean and structure raw data obtained from API responses or external sources.
+ It handles the removal of unnecessary markdown formatting, parses the data into valid JSON format, and
+ structures it for further use in applications such as chatbots or AI assistants. It can also incorporate
+ an optional system prompt into the processed data for context.
+
+ Methods
+ -------
+ dialogue_postprocessing(conversation_data: Dict[str, str], system_prompt: str = None):
+ Processes raw conversation data by cleaning, parsing, and adding an optional system prompt.
+
+ question_postprocessing(question_data: str = None):
+ Processes raw question data by cleaning and structuring it into a list of questions.
+
+ answer_postprocessing(question: str, answer: str, system_prompt: str = None):
+ Processes raw answer data by cleaning, parsing, and structuring it along with the question
+ and an optional system prompt.
+ """
+
+ def __init__(self):
+ pass
+
+ def dialogue_postprocessing(self, conversation_data: Dict[str, str], system_prompt: str = None):
+ """
+ Post-process conversation data.
+
+ This function processes raw conversation data by removing unnecessary formatting and parsing it
+ into a valid JSON format. If a system-level prompt (system_prompt) is provided, it will be added
+ as an "instruction" field to the first conversation entry. The processed data is returned as a
+ dictionary with a "conversation" key.
+
+ Parameters
+ ----------
+ conversation_data : str
+ The raw conversation data in string format, typically from an API response or an external source.
+ It may contain markdown-style formatting such as "```json" or "```" that needs to be removed.
+
+ system_prompt : str, optional
+ An optional system-level prompt that will be added to the "instruction" field of the first
+ conversation entry. If not provided, an empty string will be used. Default is None.
+
+ Returns
+ -------
+ dict or None
+ Returns a dictionary containing the processed conversation data structured under the "conversation" key.
+ Each item in the list corresponds to a conversation entry. If an error occurs during JSON parsing,
+ the function logs the error and returns None.
+
+ Examples
+ --------
+ >>> conversation_data = '''
+ [
+ {"input": "AAA", "output": "BBBB"},
+ {"input": "CCC", "output": "DDDD"}
+ ]
+ '''
+ >>> system_prompt = "You are a helpful assistant."
+ >>> processed_data = postprocessing(conversation_data, system_prompt)
+
+ >>> # Output:
+ >>> {
+ "conversation": [
+ {"input": "AAA", "output": "BBBB", "instruction": "You are a helpful assistant."},
+ {"input": "CCC", "output": "DDDD"}
+ ]
+ }
+
+ Notes
+ -----
+ - The function removes any markdown formatting (like "```json" or "```") before parsing the data.
+ - If JSON parsing fails, an error is logged, and the function returns None.
+ """
+ try:
+ # Clean and parse the JSON conversation data
+ conversation_data = json.loads(conversation_data.replace("```json", "").replace("```", ""))
+ except Exception as exception:
+ logger.error("Error parsing JSON: %s", str(exception))
+ return None
+
+ # Initialize the result dictionary with a "conversation" key
+ result = {"conversation": []}
+
+ # Add the system prompt as an instruction to the first conversation entry if provided
+ for idx, data in enumerate(conversation_data):
+ if idx == 0:
+ data["instruction"] = system_prompt if system_prompt is not None else ""
+ result["conversation"].append(data)
+
+ return result
+
+
+ def question_postprocessing(self, question_data: str = None):
+ """
+ Post-process the question data.
+
+ This function processes raw question data by removing unnecessary formatting and ensuring
+ it is in a valid JSON format. It converts each question into a structured dictionary with
+ the key "question" holding the processed content.
+
+ Parameters
+ ----------
+ question_data : str
+ The raw question data in string format, typically from an API response or external source.
+ The string may contain markdown-style formatting such as "```json" or "```" that should be removed.
+
+ Returns
+ -------
+ dict or None
+ Returns a dictionary with the format {"question": }.
+ If an error occurs during JSON parsing, it returns None.
+
+ Examples
+ --------
+ >>> question_data = "What is your name?"
+ >>> processed_data = question_postprocessing(question_data)
+ >>> print(processed_data)
+ Output: {'question': 'What is your name?'}
+
+ Notes
+ -----
+ - This function removes any markdown formatting (e.g., "```json" or "```") from the input string.
+ - If an exception occurs during JSON parsing, an error message is logged, and the function returns None.
+ """
+
+ try:
+ # Clean up and parse the JSON question data
+ question_data = json.loads(question_data.replace("```json", "").replace("```", ""))
+ except Exception as exception:
+ logger.error("Error parsing JSON: %s", str(exception))
+ return None
+
+ # Initialize the result with a "question" key
+ result = []
+
+ # Extract the question and assign it to the result
+ for _, data in enumerate(question_data):
+ result.append(data)
+
+ return result
+
+ def answer_postprocessing(self, question: str, answer: str, system_prompt: str = None):
+ """
+ Post-process conversation data.
+
+ This function processes raw conversation data by parsing it into a valid JSON format and structuring
+ it into a predefined format. It also adds an optional system prompt to each conversation entry
+ under the "instruction" key. The processed data is returned as a dictionary wrapped in a list.
+
+ Parameters
+ ----------
+ question : str
+ The input question or query from the user.
+
+ answer : str
+ The raw answer data in string format, typically containing JSON content.
+ This string may contain markdown formatting (e.g., "```json" or "```") that needs to be removed.
+
+ system_prompt : str, optional
+ An optional system-level prompt to provide context or instructions. This will be added to
+ each conversation entry under the "instruction" key. Default is None.
+
+ Returns
+ -------
+ list or None
+ Returns a list containing a dictionary with the processed conversation data.
+ The dictionary has a "conversation" key, which is a list of conversation entries.
+ Each entry contains "input", "output", and "instruction" keys.
+ If an error occurs during JSON parsing, the function logs the error and returns None.
+
+ Examples
+ --------
+ >>> # Input:
+ >>> question = "What is AI?"
+ >>> answer = '''
+ [
+ {
+ "input": question,
+ "output": "BBB"
+ }
+ ]
+ '''
+ >>> system_prompt = "You are a helpful assistant."
+
+ >>> # Function Call:
+ >>> processed_data = answer_postprocessing(question, answer, system_prompt)
+
+ >>> # Output:
+ >>> [
+ {
+ "conversation": [
+ {
+ "input": "What is AI?",
+ "output": "BBB",
+ "instruction": "You are a helpful assistant."
+ }
+ ]
+ }
+ ]
+
+ Notes
+ -----
+ - The function removes any markdown formatting (like "```json" or "```") before parsing the data.
+ - If JSON parsing fails, the function logs an error and returns None.
+ - The output is wrapped in a list to allow for future extensibility.
+ """
+
+ try:
+ # Clean up and parse the JSON conversation data
+ conversation_data = json.loads(answer.replace("```json","").replace("```",""))
+ except Exception as exception:
+ logger.error("Error parsing JSON: %s", str(exception))
+ return None
+
+ # Initialize the result with a conversation key
+ result = {"conversation": []}
+ conversation = {"instruction" : system_prompt, "input" : question}
+ # Add the system prompt to the first conversation entry if provided
+ for idx, data in enumerate(conversation_data):
+ conversation['output'] = data["answer"]
+ result["conversation"].append(conversation)
+ return result
diff --git a/build/lib/edg4llm/processor/preprocess.py b/build/lib/edg4llm/processor/preprocess.py
new file mode 100644
index 0000000..a8ebe1e
--- /dev/null
+++ b/build/lib/edg4llm/processor/preprocess.py
@@ -0,0 +1,139 @@
+import re
+import sys
+import json
+
+from edg4llm.utils.logger import custom_logger
+from edg4llm.utils.data_utils import is_question_template_consistent
+from edg4llm.utils.data_utils import is_answer_template_consistent
+from edg4llm.utils.data_utils import is_dialogue_template_consistent
+
+from edg4llm.utils.template import Template
+
+logger = custom_logger("preprocess")
+
+class PreProcessor:
+ """
+ A class for pre-processing user prompts before data generation.
+
+ This class provides methods to validate and repair user prompts in different modes such as question,
+ answer, and dialogue. If a user prompt does not match the expected template, the methods automatically
+ append the corresponding format guidelines to ensure consistency.
+
+ Methods
+ -------
+ question_preprocess(user_prompt: str) -> str:
+ Validates and repairs user prompts in question mode.
+
+ answer_preprocess(user_prompt: str) -> str:
+ Validates and repairs user prompts in answer mode.
+
+ dialogue_preprocess(user_prompt: str) -> str:
+ Validates and repairs user prompts in Q&A (dialogue) mode.
+ """
+ def __init__(self):
+ pass
+
+ def question_preprocess(self, language: str, user_prompt: str) -> str:
+ """
+ Validates and processes user prompts in question mode.
+
+ Parameters
+ ----------
+ language : str
+ The language of data in data generation. Must be one of 'zh', 'en'.
+
+ user_prompt : str
+ The user's input prompt to be processed in question mode.
+
+ Returns
+ -------
+ str
+ The validated and, if necessary, repaired user prompt.
+
+ Notes
+ -----
+ - If the user prompt matches the question template, it is returned unchanged.
+ - If the user prompt does not match, format guidelines from `Template.question_template`
+ are appended to the prompt.
+ """
+
+ if is_question_template_consistent(user_prompt=user_prompt):
+ logger.info("User prompt matches the question template. Proceeding with data generation.")
+ return user_prompt
+ else:
+ logger.warning("User prompt does not match the question template. Automatically added format guidelines.")
+ if language == "zh":
+ repaired_user_prompt = user_prompt + '\n' + Template.question_zh_template
+ else:
+ repaired_user_prompt = user_prompt + '\n' + Template.question_en_template
+ return repaired_user_prompt
+
+ def answer_preprocess(self, language: str, user_prompt: str) -> str:
+ """
+ Validates and processes user prompts in answer mode.
+
+ Parameters
+ ----------
+ language : str
+ The language of data in data generation. Must be one of 'zh', 'en'.
+
+ user_prompt : str
+ The user's input prompt to be processed in answer mode.
+
+ Returns
+ -------
+ str
+ The validated and, if necessary, repaired user prompt.
+
+ Notes
+ -----
+ - If the user prompt matches the answer template, it is returned unchanged.
+ - If the user prompt does not match, format guidelines from `Template.answer_template`
+ are appended to the prompt.
+ """
+
+ if is_answer_template_consistent(user_prompt=user_prompt):
+ logger.info("User prompt matches the answer template. Proceeding with data generation.")
+ return user_prompt
+ else:
+ logger.warning("User prompt does not match the answer template. Automatically added format guidelines.")
+ if language == "zh":
+ repaired_user_prompt = user_prompt + '\n' + Template.answer_zh_template
+ else:
+ repaired_user_prompt = user_prompt + '\n' + Template.answer_en_template
+ return repaired_user_prompt
+
+ def dialogue_preprocess(self, language: str, user_prompt: str) -> str:
+ """
+ Validates and processes user prompts in Q&A (dialogue) mode.
+
+ Parameters
+ ----------
+ language : str
+ The language of data in data generation. Must be one of 'zh', 'en'.
+
+ user_prompt : str
+ The user's input prompt to be processed in Q&A mode.
+
+ Returns
+ -------
+ str
+ The validated and, if necessary, repaired user prompt.
+
+ Notes
+ -----
+ - If the user prompt matches the dialogue template, it is returned unchanged.
+ - If the user prompt does not match, format guidelines from `Template.dialogue_template`
+ are appended to the prompt.
+ """
+
+ if is_dialogue_template_consistent(user_prompt=user_prompt):
+ logger.info("User prompt matches the dialogue template. Proceeding with data generation.")
+ return user_prompt
+ else:
+ logger.warning("User prompt does not match the dialogue template. Automatically added format guidelines.")
+ if language == "zh":
+ repaired_user_prompt = user_prompt + '\n' + Template.dialogue_zh_template
+ else:
+ repaired_user_prompt = user_prompt + '\n' + Template.dialogue_en_template
+ return repaired_user_prompt
diff --git a/build/lib/edg4llm/utils/__init__.py b/build/lib/edg4llm/utils/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/build/lib/edg4llm/utils/config.py b/build/lib/edg4llm/utils/config.py
new file mode 100644
index 0000000..a4534eb
--- /dev/null
+++ b/build/lib/edg4llm/utils/config.py
@@ -0,0 +1,8 @@
+import dataclasses
+
+@dataclasses
+class DefaultConfig:
+ """
+ A placeholder class for default configuration settings.
+ """
+ pass
diff --git a/build/lib/edg4llm/utils/data_utils.py b/build/lib/edg4llm/utils/data_utils.py
new file mode 100644
index 0000000..d928539
--- /dev/null
+++ b/build/lib/edg4llm/utils/data_utils.py
@@ -0,0 +1,157 @@
+import json
+import re
+from typing import Dict, List, Any
+
+def is_question_template_consistent(user_prompt: str) -> bool:
+ """
+ Check if the user prompt contains a consistent question JSON template.
+
+ Parameters
+ ----------
+ user_prompt : str
+ The user-provided prompt to be validated.
+
+ Returns
+ -------
+ bool
+ True if the user prompt contains a valid and consistent question JSON template,
+ False otherwise.
+
+ Notes
+ -----
+ - The function uses a regular expression to extract the JSON template and compares it
+ with the target template.
+ - The target template is:
+ [
+ {
+ "question": "AAA"
+ }
+ ]
+ - Returns False if the JSON extraction or comparison fails.
+ """
+ target_template = [
+ {
+ "question": "AAA"
+ }
+ ]
+
+ # Regular expression to extract JSON template
+ pattern = r"\[\s*{\s*\"question\"\s*:\s*\"AAA\"\s*}\s*\]"
+ match = re.search(pattern, user_prompt)
+
+ if match:
+ try:
+ extracted_template = json.loads(match.group(0))
+ except json.JSONDecodeError:
+ return False
+ return extracted_template == target_template
+ return False
+
+def is_answer_template_consistent(user_prompt: str) -> bool:
+ """
+ Check if the user prompt contains a consistent answer JSON template.
+
+ Parameters
+ ----------
+ user_prompt : str
+ The user-provided prompt to be validated.
+
+ Returns
+ -------
+ bool
+ True if the user prompt contains a valid and consistent answer JSON template,
+ False otherwise.
+
+ Notes
+ -----
+ - The function uses a regular expression to extract the JSON template and compares it
+ with the target template.
+ - The target template is:
+ [
+ {
+ "answer": "AAA"
+ }
+ ]
+ - Returns False if the JSON extraction or comparison fails.
+ """
+ target_template = [
+ {
+ "answer": "AAA"
+ }
+ ]
+
+ # Regular expression to extract JSON template
+ pattern = r"\[\s*{\s*\"answer\"\s*:\s*\"AAA\"\s*}\s*\]"
+ match = re.search(pattern, user_prompt)
+
+ if match:
+ try:
+ extracted_template = json.loads(match.group(0))
+ except json.JSONDecodeError:
+ return False
+ return extracted_template == target_template
+ return False
+
+def is_dialogue_template_consistent(user_prompt: str) -> bool:
+ """
+ Check if the user prompt contains a consistent dialogue JSON template.
+
+ Parameters
+ ----------
+ user_prompt : str
+ The user-provided prompt to be validated.
+
+ Returns
+ -------
+ bool
+ True if the user prompt contains a valid and consistent dialogue JSON template,
+ False otherwise.
+
+ Notes
+ -----
+ - The function uses a regular expression to check for the dialogue JSON structure.
+ - The expected template format is:
+ [
+ {
+ "input": "AAA",
+ "output": "BBB"
+ }
+ ]
+ """
+
+ pattern = r"\[\s*\{\{\s*\"input\"\s*:\s*\"AAA\"\s*,\s*\"output\"\s*:\s*\"BBB\"\s*\}\}\s*\]"
+ match = re.search(pattern, user_prompt)
+ return match is not None
+
+def save_data_to_json(data: List[Dict], output_path: str):
+ """
+ Save a list of dictionaries to a JSON file.
+
+ Parameters
+ ----------
+ data : list of dict
+ A list of dictionaries to be saved to a JSON file. Each dictionary should contain
+ the data to be written.
+
+ output_path : str
+ The path (including the filename) where the JSON data will be saved.
+ The file will be written in UTF-8 encoding.
+
+ Returns
+ -------
+ None
+ This function does not return any value. It saves the data to the specified file.
+
+ Examples
+ --------
+ >>> data = [{"name": "John", "age": 30}, {"name": "Jane", "age": 25}]
+ >>> save_data_to_json(data, "output.json")
+
+ Notes
+ -----
+ - The function uses `json.dump` to write the data to the file.
+ - Non-ASCII characters are preserved with the `ensure_ascii=False` argument.
+ - The file will be saved with an indentation of 4 spaces to make it human-readable.
+ """
+ with open(output_path, 'w', encoding='utf-8') as f:
+ json.dump(data, f, ensure_ascii=False, indent=4)
diff --git a/build/lib/edg4llm/utils/exceptions.py b/build/lib/edg4llm/utils/exceptions.py
new file mode 100644
index 0000000..515dd57
--- /dev/null
+++ b/build/lib/edg4llm/utils/exceptions.py
@@ -0,0 +1,35 @@
+from typing import Optional
+
+
+class HttpClientError(Exception):
+ """
+ Exception raised for errors encountered in the HTTP client.
+
+ Parameters
+ ----------
+ message : str
+ A detailed error message describing the issue.
+ status_code : Optional[int], optional
+ The HTTP status code associated with the error, by default None.
+
+ Attributes
+ ----------
+ status_code : Optional[int]
+ The HTTP status code associated with the error.
+ """
+
+ def __init__(self, message: str, status_code: Optional[int] = None):
+ super().__init__(message)
+ self.status_code = status_code
+
+
+class InvalidPromptError(Exception):
+ """
+ Custom exception raised when an invalid or empty prompt is encountered.
+
+ Notes
+ -----
+ This exception is intended to handle cases where a required prompt input
+ is missing or invalid.
+ """
+ pass
diff --git a/build/lib/edg4llm/utils/logger.py b/build/lib/edg4llm/utils/logger.py
new file mode 100644
index 0000000..de430c1
--- /dev/null
+++ b/build/lib/edg4llm/utils/logger.py
@@ -0,0 +1,104 @@
+import datetime
+import logging
+
+__all__ = ['custom_logger']
+
+# Define log level colors for terminal output
+LOG_COLORS = {
+ 'DEBUG': '\033[96m', # Cyan
+ 'INFO': '\033[92m', # Green
+ 'WARNING': '\033[93m', # Yellow
+ 'ERROR': '\033[91m', # Red
+ 'CRITICAL': '\033[1;91m', # Bold Red
+ 'RESET': '\033[0m', # Reset color
+}
+
+def custom_logger(name: str):
+ """
+ Creates a custom logger with color-coded log levels and UTC+8 time formatting.
+
+ Parameters
+ ----------
+ name : str
+ The name of the logger, typically the name of the module or application.
+
+ Returns
+ -------
+ logging.Logger
+ A customized logger instance with color-coded levels and UTC+8 timezone support.
+
+ Notes
+ -----
+ - Log levels are color-coded for easier readability in terminal output.
+ - Log messages use UTC+8 timezone formatting.
+ - The logger prevents propagation to root loggers and clears existing handlers.
+ - The logger uses a custom `StreamHandler` with color support.
+ """
+ # Create a logger instance
+ logger = logging.getLogger(name)
+ logger.setLevel(logging.INFO) # Default log level
+ logger.propagate = False # Disable propagation to root loggers
+ logger.handlers = [] # Clear any existing handlers
+
+ # Define a custom log message format
+ formatter = logging.Formatter(
+ '[%(asctime)s]-[%(name)s:%(levelname)s]:%(message)s'
+ )
+
+ # Custom time converter to use UTC+8
+ def _utc8_aera(timestamp):
+ """
+ Convert a timestamp to a UTC+8 time tuple.
+
+ Parameters
+ ----------
+ timestamp : float
+ The timestamp to convert.
+
+ Returns
+ -------
+ time.struct_time
+ A time tuple in UTC+8 timezone.
+ """
+ now = datetime.datetime.fromtimestamp(timestamp, tz=datetime.timezone.utc) + datetime.timedelta(hours=8)
+ return now.timetuple()
+
+ # Set the custom time converter in the formatter
+ formatter.converter = _utc8_aera
+
+ # Define a custom StreamHandler with color-coded log levels
+ class ColorStreamHandler(logging.StreamHandler):
+ """
+ A custom logging stream handler that adds color coding to log messages.
+
+ Methods
+ -------
+ emit(record):
+ Formats and outputs a log record with color coding based on log level.
+ """
+ def emit(self, record):
+ """
+ Format and emit a log record with color coding.
+
+ Parameters
+ ----------
+ record : logging.LogRecord
+ The log record to process and output.
+ """
+ try:
+ msg = self.format(record) # Format the log record
+ color = LOG_COLORS.get(record.levelname, LOG_COLORS['RESET']) # Get the color for the log level
+ # Write the log message with color
+ self.stream.write(f"{color}{msg}{LOG_COLORS['RESET']}\n")
+ self.flush() # Flush the stream
+ except Exception:
+ self.handleError(record) # Handle any errors during logging
+
+ # Create and configure the custom handler
+ custom_handler = ColorStreamHandler()
+ custom_handler.setFormatter(formatter)
+
+ # Add the custom handler to the logger
+ logger.addHandler(custom_handler)
+
+ return logger
diff --git a/build/lib/edg4llm/utils/template.py b/build/lib/edg4llm/utils/template.py
new file mode 100644
index 0000000..3115009
--- /dev/null
+++ b/build/lib/edg4llm/utils/template.py
@@ -0,0 +1,113 @@
+from dataclasses import dataclass
+
+@dataclass
+class Template:
+ """
+ A class to define language-specific templates for user prompts, providing a strict JSON format
+ to preprocess user input. If the user's prompt does not include format instructions, the
+ appropriate template will be added to enforce the required structure.
+
+ Attributes:
+ ----------
+ question_zh_template : str
+ A JSON format template for Chinese question prompts. Ensures that generated questions
+ are returned in a JSON format with a "question" field.
+
+ answer_zh_template : str
+ A JSON format template for Chinese answer prompts. Ensures that generated answers
+ are returned in a JSON format with an "answer" field.
+
+ dialogue_zh_template : str
+ A JSON format template for Chinese dialogue prompts. Ensures that the interaction is
+ returned in a JSON format with "input" representing the question and "output" representing
+ the response.
+
+ question_en_template : str
+ A JSON format template for English question prompts. Ensures that generated questions
+ are returned in a JSON format with a "question" field.
+
+ answer_en_template : str
+ A JSON format template for English answer prompts. Ensures that generated answers
+ are returned in a JSON format with an "answer" field.
+
+ dialogue_en_template : str
+ A JSON format template for English dialogue prompts. Ensures that the interaction is
+ returned in a JSON format with "input" representing the question and "output" representing
+ the response.
+
+ Notes:
+ -----
+ This class is designed for preprocessing user prompts. If a user's input does not include
+ specific format instructions, the appropriate template (based on language) is appended to
+ the user prompt to ensure compliance with the required JSON format.
+ """
+
+ question_zh_template = \
+ """
+ 严格遵循规则: 请以如下格式返回生成的数据, 只返回JSON格式,json模板:
+ [
+ {
+ "question":"AAA"
+ }
+ ]
+ 其中question字段表示生成的问题
+ """
+
+ answer_zh_template = \
+ """
+ 严格遵循规则: 请以如下格式返回生成的数据, 只返回JSON格式,json模板:
+ [
+ {
+ "answer":"AAA"
+ }
+ ]
+ 其中answer字段表示生成的答案
+ """
+
+ dialogue_zh_template = \
+ """
+ 严格遵循规则: 请以如下格式返回生成的数据, 只返回JSON格式,json模板:
+ [
+ {{
+ "input":"AAA","output":"BBB"
+ }}
+ ]
+ 其中input字段表示问题, output字段回答
+ """
+
+ question_en_template = \
+ """
+ Strictly follow the rules: Please return the generated data in the following format,
+ only in JSON format. JSON template:
+ [
+ {
+ "question":"AAA"
+ }
+ ]
+ The "question" field represents the generated question.
+ """
+
+ answer_en_template = \
+ """
+ Strictly follow the rules: Please return the generated data in the following format,
+ only in JSON format. JSON template:
+ [
+ {
+ "answer":"AAA"
+ }
+ ]
+ The "answer" field represents the generated answer.
+ """
+
+ dialogue_en_template = \
+ """
+ Strictly follow the rules: Please return the generated data in the following format,
+ only in JSON format. JSON template:
+ [
+ {{
+ "input":"AAA","output":"BBB"
+ }}
+ ]
+ The "input" field represents the question, and the "output" field
+ represents the answer.
+ """
diff --git a/dist/edg4llm-1.0.14-py3-none-any.whl b/dist/edg4llm-1.0.14-py3-none-any.whl
new file mode 100644
index 0000000..88568a1
Binary files /dev/null and b/dist/edg4llm-1.0.14-py3-none-any.whl differ
diff --git a/dist/edg4llm-1.0.14.tar.gz b/dist/edg4llm-1.0.14.tar.gz
new file mode 100644
index 0000000..6138b74
Binary files /dev/null and b/dist/edg4llm-1.0.14.tar.gz differ
diff --git a/edg4llm.egg-info/PKG-INFO b/edg4llm.egg-info/PKG-INFO
new file mode 100644
index 0000000..02809bc
--- /dev/null
+++ b/edg4llm.egg-info/PKG-INFO
@@ -0,0 +1,281 @@
+Metadata-Version: 2.1
+Name: edg4llm
+Version: 1.0.14
+Summary: A unified tool to generate fine-tuning datasets for LLMs, including questions, answers, and dialogues.
+Home-page: https://github.com/alannikos/edg4llm
+Author: Alannikos
+Author-email: alannikos768@outlook.com
+Keywords: LLM fine-tuning data-generation AI NLP
+Classifier: Programming Language :: Python :: 3
+Classifier: License :: OSI Approved :: MIT License
+Classifier: Operating System :: OS Independent
+Classifier: Intended Audience :: Developers
+Classifier: Topic :: Software Development :: Libraries
+Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
+Requires-Python: >=3.8
+Description-Content-Type: text/markdown
+License-File: LICENSE
+Requires-Dist: requests>=2.32.3
+
+# EDG4LLM
+
+
+
+
+```
+ __ __ __ __ ___ __ __ __ __
+| | |_ | / / \ |\/| |_ | / \ |_ | \ / _ |__| | | |\/|
+|/\| |__ |__ \__ \__/ | | |__ | \__/ |__ |__/ \__) | |__ |__ | |
+
+```
+
+
+
+
+
+
+[📘Documentation](https://github.com/Alannikos/FunGPT) |
+[🛠️Quick Start](https://github.com/Alannikos/FunGPT) |
+[🤔Reporting Issues](https://github.com/Alannikos/FunGPT/issues)
+
+
+
+
+
+
+[![GitHub Issues](https://img.shields.io/github/issues/Alannikos/edg4llm?style=flat&logo=github&color=%23FF5252)](https://github.com/Alannikos/edg4llm/issues)
+[![GitHub forks](https://img.shields.io/github/forks/Alannikos/edg4llm?style=flat&logo=github&color=%23FF9800)](https://github.com/Alannikos/edg4llm/forks)
+![GitHub Repo stars](https://img.shields.io/github/stars/Alannikos/edg4llm?style=flat&logo=github&color=%23FFEB3B)
+![GitHub License](https://img.shields.io/github/license/Alannikos/edg4llm?style=flat&logo=github&color=%234CAF50)
+[![Discord](https://img.shields.io/discord/1327445853388144681?style=flat&logo=discord)](https://discord.com/channels/1327445853388144681/)
+[![Bilibili](https://img.shields.io/badge/dynamic/json?url=https%3A%2F%2Fapi.bilibili.com%2Fx%2Frelation%2Fstat%3Fvmid%3D3494365446015137&query=%24.data.follower&style=flat&logo=bilibili&label=followers&color=%23FF69B4)](https://space.bilibili.com/3494365446015137)
+[![PyPI - Version](https://img.shields.io/pypi/v/edg4llm?style=flat&logo=pypi&logoColor=blue&color=red)](https://pypi.org/project/edg4llm/)
+[![PyPI - Downloads](https://img.shields.io/pypi/dm/edg4llm?color=blue&logo=pypi&logoColor=gold)](https://pypi.org/project/edg4llm/)
+[![PyPI - Python Version](https://img.shields.io/pypi/pyversions/edg4llm?logo=python&logoColor=gold)](https://pypi.org/project/edg4llm/)
+
+
+
+**Easy Data Generation For Large Language Model(abbreviated as EDG4LLM)**, A unified tool to generate fine-tuning datasets for LLMs, including questions, answers, and dialogues.
+
+
+## Latest News
+
+
+2025
+
+- [2025/01/11] 👋👋 We are excited to announce [**the initial release of edg4llm v1.0.12**](https://pypi.org/project/edg4llm/1.0.12/), marking the completion of its core functionalities.
+
+
+
+## Table of Contents
+- [Latest News](#latest-news)
+- [Introduction](#introduction)
+- [Features](#features)
+- [Installation](#installation)
+- [Quick Start](#quick-start)
+- [Requirements](#requirements)
+- [License](#license)
+- [Future Development Plans](#future-development-plans)
+- [Acknowledgments](#acknowledgments)
+- [License](#license)
+- [Contact us](#contact-me)
+- [Star History](#star-history)
+
+## Introduction
+**edg4llm** is a Python library designed specifically for generating fine-tuning data using large language models. This tool aims to assist users in creating high-quality training datasets efficiently. At its current stage, it mainly supports text data generation. The generated data includes, but is not limited to:
+- **Question data**
+- **Answer data**
+- **Dialogue data**
+
+With **edg4llm**, users can easily produce diverse datasets tailored to fine-tuning requirements, significantly enhancing the performance of large language models in specific tasks.
+## Features
+EDG4LLM is a unified tool designed to simplify and accelerate the creation of fine-tuning datasets for large language models. With a focus on usability, efficiency, and adaptability, it offers a range of features to meet diverse development needs while ensuring seamless integration and robust debugging support.
+
+1. **Simple to Use**: Provides a straightforward interface that allows users to get started without complex configurations.
+2. **Lightweight**: Minimal dependencies and low resource consumption make it efficient and easy to use.
+3. **Flexibility**: Supports a variety of data formats and generation options, allowing customization to meet specific needs.
+4. **Compatibility**: Seamlessly integrates with mainstream large language models and is suitable for various development scenarios.
+5. **Transparent Debugging**: Provides clear and detailed log outputs, making it easy to debug and trace issues effectively.
+
+## Installation
+To install **edg4llm**, simply run the following command in your terminal:
+
+
+```bash
+pip install edg4llm
+```
+
+### Supported Python Versions
+- **Supported Python Versions**: Python 3.8 or higher is required for compatibility with this library. Ensure your environment meets this version requirement.
+
+### Supported LLM Provider
+The current version of edg4llm supports the following large language model providers:
+- [**InternLM**](https://github.com/InternLM)
+ - Developer: Developed by the Shanghai Artificial Intelligence Laboratory.
+ - Advantages: InternLM is a series of open-source large language models that offer outstanding reasoning, long-text processing, and tool usage capabilities.
+
+- [**ChatGLM**](https://github.com/THUDM/)
+ - Developer: Jointly developed by Tsinghua University and Zhipu AI.
+ - Advantages: ChatGLM is an open-source, bilingual dialog language model based on the General Language Model (GLM) architecture. It has been trained on a large corpus of Chinese and English text, making it highly effective for generating natural and contextually relevant responses.
+- [**DeepSeek**](https://github.com/deepseek-ai/)
+ - Developer: Developed by the DeepSeek team.
+ - Advantages: DeepSeek-V3 is a powerful and cost-effective open-source large language model. It offers top-tier performance, especially in tasks like language generation, question answering, and dialog systems.
+- [**OpenAI ChatGPT**](https://chatgpt.com/)
+ - Developer: Developed by OpenAI.
+ - Advantages: OpenAI's ChatGPT is a highly advanced language model known for its robust text generation capabilities. It has been trained on a vast amount of data, allowing it to generate high-quality and contextually relevant responses.
+
+More providers will be added in future updates to extend compatibility and functionality.
+
+| **Model** | **Free** | **Base URL** |
+|--------------------|------------------|------------------------------------------------------------|
+| **InternLM** | Yes(Partly) | `https://internlm-chat.intern-ai.org.cn/puyu/api/v1/chat/completions` |
+| **ChatGLM** | Yes(Partly) | `https://open.bigmodel.cn/api/paas/v4/chat/completions/` |
+| **DeepSeek** | Yes(Free Trial for New Users) | `https://api.deepseek.com/chat/completions` |
+| **OpenAI ChatGPT** | No (Paid Plans) | `https://api.openai.com/v1/chat/completions` |
+
+
+## Quick Start
+
+To get started with **edg4llm**, follow the steps below. This example demonstrates how to use the library to generate dialogue data based on a specific prompt.
+
+### Prerequisites
+
+1. Install the **edg4llm** package:
+```bash
+ pip install edg4llm
+```
+
+2. Ensure you have Python version 3.8 or higher.
+
+3. Obtain the necessary API key and base URL for your chosen model provider (e.g., ChatGLM).
+
+### Code Example(Chinese Version)
+```python
+# chatglm_demo.py
+
+import edg4llm
+print(edg4llm.__version__)
+
+from edg4llm import EDG4LLM
+
+api_key = "xxx"
+base_url = "https://open.bigmodel.cn/api/paas/v4/chat/completions"
+
+edg = EDG4LLM(model_provider='chatglm', model_name="glm-4-flash", base_url=base_url, api_key=api_key)
+# 设置测试数据
+system_prompt = """你是一个精通中国古代诗词的古文学大师"""
+
+user_prompt = """
+ 目标: 1. 请生成过年为场景的连续多轮对话记录
+ 2. 提出的问题要多样化。
+ 3. 要符合人类的说话习惯。
+ 4. 严格遵循规则: 请以如下格式返回生成的数据, 只返回JSON格式,json模板:
+ [
+ {{
+ "input":"AAA","output":"BBB"
+ }}
+ ]
+ 其中input字段表示一个人的话语, output字段表示专家的话语
+"""
+num_samples = 1 # 只生成一个对话样本
+
+# 调用 generate 方法生成对话
+data_dialogue = edg.generate(
+ task_type="dialogue",
+ system_prompt=system_prompt,
+ user_prompt=user_prompt,
+ num_samples=num_samples
+)
+```
+### Code Example(English Version)
+```python
+# chatglm_demo.py
+
+import edg4llm
+print(edg4llm.__version__)
+
+from edg4llm import EDG4LLM
+
+api_key = "xxx"
+base_url = "https://open.bigmodel.cn/api/paas/v4/chat/completions"
+
+edg = EDG4LLM(model_provider='chatglm', model_name="glm-4-flash", base_url=base_url, api_key=api_key)
+
+# Set the test data
+system_prompt = """You are a master of ancient Chinese literature, specializing in classical poetry."""
+
+user_prompt = """
+ Goal: 1. Please generate a multi-turn dialogue set in the context of celebrating the Lunar New Year.
+ 2. The questions should be diverse.
+ 3. The dialogue should align with natural human conversational habits.
+ 4. Strictly follow this rule: Please return the generated data in the following format, only in JSON format. JSON template:
+ [
+ {{
+ "input":"AAA","output":"BBB"
+ }}
+ ]
+ Where the input field represents a person's dialogue, and the output field represents the expert's response.
+"""
+num_samples = 1 # Generate only one dialogue sample
+
+# Call the generate method to generate the dialogue
+data_dialogue = edg.generate(
+ task_type="dialogue",
+ system_prompt=system_prompt,
+ user_prompt=user_prompt,
+ num_samples=num_samples
+)
+
+```
+
+### Explanation
+
+1. Importing the Library: Import the edg4llm library and verify the version using print(edg4llm.__version__).
+
+2. Initialization: Use EDG4LLM to initialize the library with the appropriate model provider, model name, base URL, and API key.
+
+3. Prompts:
+ - system_prompt defines the behavior or role of the assistant.
+ - user_prompt provides specific instructions for generating data.
+4. Data Generation:
+Use the generate method with the following parameters:
+ - task_type: Defines the type of task (e.g., dialogue, question-answering).
+ - system_prompt and user_prompt: Provide context and task-specific instructions.
+ - num_samples: Specifies how many samples to generate.
+5. Output: The generated data is returned as a JSON object in the specified format.
+
+## Requirements
+This project has **minimal dependencies**, requiring only the requests library. Make sure to have the following version installed:
+
+- requests>=2.32.3
+
+## Future Development Plans
+1. - [ ] Recording Introduction Video
+2. - [ ] Support Gemini2
+3. - [ ] Support local large language models
+4. - [ ] Support other types of data, such as picture.
+
+## Acknowledgments
+| Project | Description |
+|---|---|
+| [FunGPT](https://github.com/Alannikos/FunGPT) | An open-source Role-Play project |
+| [InternLM](https://github.com/InternLM/InternLM) | A series of advanced open-source large language models |
+| [ChatGLM](https://github.com/THUDM/) | A bilingual dialog language model based on the General Language Model (GLM) architecture, jointly developed by Tsinghua University and Zhipu AI. |
+| [DeepSeek](https://github.com/deepseek-ai/) | A powerful and cost-effective open-source large language model, excelling in tasks such as language generation, question answering, and dialog systems. |
+| [ChatGPT](https://openai.com/chatgpt/) | A highly advanced language model developed by OpenAI, known for its robust text generation capabilities. |
+
+## License
+MIT License - See [LICENSE](LICENSE) for details.
+
+## Contact Me
+Thank you for using **EDG4LLM**! Your support and feedback are invaluable in making this project better.
+
+If you encounter any issues, have suggestions, or simply want to share your thoughts, feel free to:
+- Submit an Issue: Visit the [Issues Page](https://github.com/Alannikos/edg4llm/issues) and describe the problem or suggestion.
+- Email Me: You can also reach out directly via email at alannikos768@outlook.com. I'll do my best to respond promptly.
+
+Your contributions and feedback are greatly appreciated. Thank you for helping improve this tool!
+
+## Star History
+
+[![Star History Chart](https://api.star-history.com/svg?repos=Alannikos/edg4llm&type=Date)](https://star-history.com/#Alannikos/edg4llm&Date)
diff --git a/edg4llm.egg-info/SOURCES.txt b/edg4llm.egg-info/SOURCES.txt
new file mode 100644
index 0000000..0c7a2ce
--- /dev/null
+++ b/edg4llm.egg-info/SOURCES.txt
@@ -0,0 +1,35 @@
+LICENSE
+README.md
+setup.py
+edg4llm/__init__.py
+edg4llm.egg-info/PKG-INFO
+edg4llm.egg-info/SOURCES.txt
+edg4llm.egg-info/dependency_links.txt
+edg4llm.egg-info/not-zip-safe
+edg4llm.egg-info/requires.txt
+edg4llm.egg-info/top_level.txt
+edg4llm/core/__init__.py
+edg4llm/core/dataGenerators.py
+edg4llm/core/interface.py
+edg4llm/core/pipeline.py
+edg4llm/generators/__init__.py
+edg4llm/generators/text_generators/__init__.py
+edg4llm/generators/text_generators/answer_generator.py
+edg4llm/generators/text_generators/base_generator.py
+edg4llm/generators/text_generators/dialogue_generator.py
+edg4llm/generators/text_generators/question_generator.py
+edg4llm/models/__init__.py
+edg4llm/models/baseModel.py
+edg4llm/models/chatglm.py
+edg4llm/models/chatgpt.py
+edg4llm/models/deepseek.py
+edg4llm/models/internlm.py
+edg4llm/processor/__init__.py
+edg4llm/processor/postprocess.py
+edg4llm/processor/preprocess.py
+edg4llm/utils/__init__.py
+edg4llm/utils/config.py
+edg4llm/utils/data_utils.py
+edg4llm/utils/exceptions.py
+edg4llm/utils/logger.py
+edg4llm/utils/template.py
\ No newline at end of file
diff --git a/edg4llm.egg-info/dependency_links.txt b/edg4llm.egg-info/dependency_links.txt
new file mode 100644
index 0000000..8b13789
--- /dev/null
+++ b/edg4llm.egg-info/dependency_links.txt
@@ -0,0 +1 @@
+
diff --git a/edg4llm.egg-info/not-zip-safe b/edg4llm.egg-info/not-zip-safe
new file mode 100644
index 0000000..8b13789
--- /dev/null
+++ b/edg4llm.egg-info/not-zip-safe
@@ -0,0 +1 @@
+
diff --git a/edg4llm.egg-info/requires.txt b/edg4llm.egg-info/requires.txt
new file mode 100644
index 0000000..d86a09d
--- /dev/null
+++ b/edg4llm.egg-info/requires.txt
@@ -0,0 +1 @@
+requests>=2.32.3
diff --git a/edg4llm.egg-info/top_level.txt b/edg4llm.egg-info/top_level.txt
new file mode 100644
index 0000000..7080f1d
--- /dev/null
+++ b/edg4llm.egg-info/top_level.txt
@@ -0,0 +1 @@
+edg4llm