diff --git a/pkg/externalfunctions/dataextraction.go b/pkg/externalfunctions/dataextraction.go index 244b362..1102c6e 100644 --- a/pkg/externalfunctions/dataextraction.go +++ b/pkg/externalfunctions/dataextraction.go @@ -22,7 +22,7 @@ import ( "github.com/tmc/langchaingo/textsplitter" ) -// DataExtractionGetGithubFilesToExtract gets all files from github that need to be extracted. +// GetGithubFilesToExtract gets all files from github that need to be extracted. // // Tags: // - @displayName: List Github Files @@ -38,7 +38,7 @@ import ( // // Returns: // - githubFilesToExtract: github files to extract. -func DataExtractionGetGithubFilesToExtract(githubRepoName string, githubRepoOwner string, +func GetGithubFilesToExtract(githubRepoName string, githubRepoOwner string, githubRepoBranch string, githubAccessToken string, githubFileExtensions []string, githubFilteredDirectories []string, githubExcludedDirectories []string) (githubFilesToExtract []string) { @@ -74,7 +74,7 @@ func DataExtractionGetGithubFilesToExtract(githubRepoName string, githubRepoOwne return githubFilesToExtract } -// DataExtractionGetLocalFilesToExtract gets all files from local that need to be extracted. +// GetLocalFilesToExtract gets all files from local that need to be extracted. // // Tags: // - @displayName: List Local Files @@ -87,7 +87,7 @@ func DataExtractionGetGithubFilesToExtract(githubRepoName string, githubRepoOwne // // Returns: // - localFilesToExtract: local files to extract. -func DataExtractionGetLocalFilesToExtract(localPath string, localFileExtensions []string, +func GetLocalFilesToExtract(localPath string, localFileExtensions []string, localFilteredDirectories []string, localExcludedDirectories []string) (localFilesToExtract []string) { // Check if the local path exists. if _, err := os.Stat(localPath); os.IsNotExist(err) { @@ -120,7 +120,7 @@ func DataExtractionGetLocalFilesToExtract(localPath string, localFileExtensions return *localFiles } -// DataExtractionAppendStringSlices creates a new slice by appending all elements of the provided slices. +// AppendStringSlices creates a new slice by appending all elements of the provided slices. // // Tags: // - @displayName: Append String Slices @@ -130,7 +130,7 @@ func DataExtractionGetLocalFilesToExtract(localPath string, localFileExtensions // // Returns: // - result: a new slice with all elements appended. -func DataExtractionAppendStringSlices(slice1, slice2, slice3, slice4, slice5 []string) []string { +func AppendStringSlices(slice1, slice2, slice3, slice4, slice5 []string) []string { var result []string // Append all elements from each slice to the result slice @@ -143,7 +143,7 @@ func DataExtractionAppendStringSlices(slice1, slice2, slice3, slice4, slice5 []s return result } -// DataExtractionDownloadGithubFileContent downloads file content from github and returns checksum and content. +// DownloadGithubFileContent downloads file content from github and returns checksum and content. // // Tags: // - @displayName: Download Github File Content @@ -158,7 +158,7 @@ func DataExtractionAppendStringSlices(slice1, slice2, slice3, slice4, slice5 []s // Returns: // - checksum: checksum of file. // - content: content of file. -func DataExtractionDownloadGithubFileContent(githubRepoName string, githubRepoOwner string, +func DownloadGithubFileContent(githubRepoName string, githubRepoOwner string, githubRepoBranch string, gihubFilePath string, githubAccessToken string) (checksum string, content string) { // Create a new GitHub client and context. @@ -188,7 +188,7 @@ func DataExtractionDownloadGithubFileContent(githubRepoName string, githubRepoOw return checksum, content } -// DataExtractionGetLocalFileContent reads local file and returns checksum and content. +// GetLocalFileContent reads local file and returns checksum and content. // // Tags: // - @displayName: Get Local File Content @@ -199,7 +199,7 @@ func DataExtractionDownloadGithubFileContent(githubRepoName string, githubRepoOw // Returns: // - checksum: checksum of file. // - content: content of file. -func DataExtractionGetLocalFileContent(localFilePath string) (checksum string, content string) { +func GetLocalFileContent(localFilePath string) (checksum string, content string) { // Read file from local path. contentBytes, err := os.ReadFile(localFilePath) if err != nil { @@ -228,7 +228,7 @@ func DataExtractionGetLocalFileContent(localFilePath string) (checksum string, c return checksum, content } -// DataExtractionGetDocumentType returns the document type of a file. +// GetDocumentType returns the document type of a file. // // Tags: // - @displayName: Get Document Type @@ -238,7 +238,7 @@ func DataExtractionGetLocalFileContent(localFilePath string) (checksum string, c // // Returns: // - documentType: file extension. -func DataExtractionGetDocumentType(filePath string) (documentType string) { +func GetDocumentType(filePath string) (documentType string) { // Extract the file extension from the file path and remove the leading period. fileExtension := filepath.Ext(filePath) documentType = strings.TrimPrefix(fileExtension, ".") @@ -246,7 +246,7 @@ func DataExtractionGetDocumentType(filePath string) (documentType string) { return documentType } -// DataExtractionLangchainSplitter splits content into chunks using langchain. +// LangchainSplitter splits content into chunks using langchain. // // Tags: // - @displayName: Split Content @@ -259,7 +259,7 @@ func DataExtractionGetDocumentType(filePath string) (documentType string) { // // Returns: // - output: chunks as an slice of strings. -func DataExtractionLangchainSplitter(content string, documentType string, chunkSize int, chunkOverlap int) (output []string) { +func LangchainSplitter(content string, documentType string, chunkSize int, chunkOverlap int) (output []string) { output = []string{} var splittedChunks []schema.Document var err error @@ -334,7 +334,7 @@ func DataExtractionLangchainSplitter(content string, documentType string, chunkS return output } -// DataExtractionGenerateDocumentTree generates a tree structure from the document chunks. +// GenerateDocumentTree generates a tree structure from the document chunks. // // Tags: // - @displayName: Document Tree @@ -352,7 +352,7 @@ func DataExtractionLangchainSplitter(content string, documentType string, chunkS // // Returns: // - documentData: tree structure of the document. -func DataExtractionGenerateDocumentTree(documentName string, documentId string, documentChunks []string, +func GenerateDocumentTree(documentName string, documentId string, documentChunks []string, embeddingsDimensions int, getSummary bool, getKeywords bool, numKeywords int, chunkSize int, numLlmWorkers int) (returnedDocumentData []sharedtypes.DbData) { logging.Log.Debugf(internalstates.Ctx, "Processing document: %s with %v leaf chunks \n", documentName, len(documentChunks)) diff --git a/pkg/externalfunctions/dataextraction_test.go b/pkg/externalfunctions/dataextraction_test.go index 4e7c129..c0e3208 100644 --- a/pkg/externalfunctions/dataextraction_test.go +++ b/pkg/externalfunctions/dataextraction_test.go @@ -7,7 +7,7 @@ import ( "testing" ) -func TestDataExtractionGetDocumentType(t *testing.T) { +func TestGetDocumentType(t *testing.T) { tests := []struct { fileName string expected string @@ -22,14 +22,14 @@ func TestDataExtractionGetDocumentType(t *testing.T) { } for _, test := range tests { - actual := DataExtractionGetDocumentType(test.fileName) + actual := GetDocumentType(test.fileName) if actual != test.expected { t.Errorf("GetFileExtension(%s): expected %s, actual %s", test.fileName, test.expected, actual) } } } -func TestDataExtractionGetLocalFileContent(t *testing.T) { +func TestGetLocalFileContent(t *testing.T) { // Create a temporary file for testing. tempFile, err := os.CreateTemp("", "testfile") if err != nil { @@ -54,7 +54,7 @@ func TestDataExtractionGetLocalFileContent(t *testing.T) { expectedChecksum := hex.EncodeToString(hash.Sum(nil)) // Call the function with the test file. - actualChecksum, actualContent := DataExtractionGetLocalFileContent(tempFile.Name()) + actualChecksum, actualContent := GetLocalFileContent(tempFile.Name()) // Check if the actual checksum matches the expected checksum. if actualChecksum != expectedChecksum { @@ -67,7 +67,7 @@ func TestDataExtractionGetLocalFileContent(t *testing.T) { } } -func TestDataExtractionAppendStringSlices(t *testing.T) { +func TestAppendStringSlices(t *testing.T) { tests := []struct { slice1 []string slice2 []string @@ -83,7 +83,7 @@ func TestDataExtractionAppendStringSlices(t *testing.T) { } for _, test := range tests { - actual := DataExtractionAppendStringSlices(test.slice1, test.slice2, test.slice3, test.slice4, test.slice5) + actual := AppendStringSlices(test.slice1, test.slice2, test.slice3, test.slice4, test.slice5) if len(actual) != len(test.expected) { t.Errorf("expected length %d, got %d", len(test.expected), len(actual)) } diff --git a/pkg/externalfunctions/externalfunctions.go b/pkg/externalfunctions/externalfunctions.go index 6c2149d..857aef8 100644 --- a/pkg/externalfunctions/externalfunctions.go +++ b/pkg/externalfunctions/externalfunctions.go @@ -39,16 +39,16 @@ var ExternalFunctionsMap = map[string]interface{}{ "AnsysGPTGetSystemPrompt": AnsysGPTGetSystemPrompt, // data extraction - "DataExtractionGetGithubFilesToExtract": DataExtractionGetGithubFilesToExtract, - "DataExtractionGetLocalFilesToExtract": DataExtractionGetLocalFilesToExtract, - "DataExtractionAppendStringSlices": DataExtractionAppendStringSlices, - "DataExtractionDownloadGithubFileContent": DataExtractionDownloadGithubFileContent, - "DataExtractionGetLocalFileContent": DataExtractionGetLocalFileContent, - "DataExtractionGetDocumentType": DataExtractionGetDocumentType, - "DataExtractionLangchainSplitter": DataExtractionLangchainSplitter, - "DataExtractionGenerateDocumentTree": DataExtractionGenerateDocumentTree, - "DataExtractionAddDataRequest": DataExtractionAddDataRequest, - "DataExtractionCreateCollectionRequest": DataExtractionCreateCollectionRequest, + "GetGithubFilesToExtract": GetGithubFilesToExtract, + "GetLocalFilesToExtract": GetLocalFilesToExtract, + "AppendStringSlices": AppendStringSlices, + "DownloadGithubFileContent": DownloadGithubFileContent, + "GetLocalFileContent": GetLocalFileContent, + "GetDocumentType": GetDocumentType, + "LangchainSplitter": LangchainSplitter, + "GenerateDocumentTree": GenerateDocumentTree, + "AddDataRequest": AddDataRequest, + "CreateCollectionRequest": CreateCollectionRequest, // generic "AssignStringToString": AssignStringToString, diff --git a/pkg/externalfunctions/knowledgedb.go b/pkg/externalfunctions/knowledgedb.go index b57afd5..0fb094e 100644 --- a/pkg/externalfunctions/knowledgedb.go +++ b/pkg/externalfunctions/knowledgedb.go @@ -707,7 +707,7 @@ func CreateDbFilter( return filters } -// DataExtractionAddDataRequest sends a request to the add_data endpoint. +// AddDataRequest sends a request to the add_data endpoint. // // Tags: // - @displayName: Add Data @@ -715,7 +715,7 @@ func CreateDbFilter( // Parameters: // - collectionName: name of the collection the request is sent to. // - data: the data to add. -func DataExtractionAddDataRequest(collectionName string, documentData []sharedtypes.DbData) { +func AddDataRequest(collectionName string, documentData []sharedtypes.DbData) { // Create the AddDataInput object requestObject := sharedtypes.DbAddDataInput{ CollectionName: collectionName, @@ -739,14 +739,14 @@ func DataExtractionAddDataRequest(collectionName string, documentData []sharedty return } -// DataExtractionCreateCollectionRequest sends a request to the collection endpoint. +// CreateCollectionRequest sends a request to the collection endpoint. // // Tags: // - @displayName: Create Collection // // Parameters: // - collectionName: the name of the collection to create. -func DataExtractionCreateCollectionRequest(collectionName string) { +func CreateCollectionRequest(collectionName string) { // Create the CreateCollectionInput object requestObject := sharedtypes.DbCreateCollectionInput{ CollectionName: collectionName,