Skip to content

Commit

Permalink
fix: rename data extraction functions (#73)
Browse files Browse the repository at this point in the history
  • Loading branch information
laurasgkadri98 authored Sep 12, 2024
1 parent 93dca50 commit 3c2a9f9
Show file tree
Hide file tree
Showing 4 changed files with 36 additions and 36 deletions.
32 changes: 16 additions & 16 deletions pkg/externalfunctions/dataextraction.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ import (
"github.com/tmc/langchaingo/textsplitter"
)

// DataExtractionGetGithubFilesToExtract gets all files from github that need to be extracted.
// GetGithubFilesToExtract gets all files from github that need to be extracted.
//
// Tags:
// - @displayName: List Github Files
Expand All @@ -38,7 +38,7 @@ import (
//
// Returns:
// - githubFilesToExtract: github files to extract.
func DataExtractionGetGithubFilesToExtract(githubRepoName string, githubRepoOwner string,
func GetGithubFilesToExtract(githubRepoName string, githubRepoOwner string,
githubRepoBranch string, githubAccessToken string, githubFileExtensions []string,
githubFilteredDirectories []string, githubExcludedDirectories []string) (githubFilesToExtract []string) {

Expand Down Expand Up @@ -74,7 +74,7 @@ func DataExtractionGetGithubFilesToExtract(githubRepoName string, githubRepoOwne
return githubFilesToExtract
}

// DataExtractionGetLocalFilesToExtract gets all files from local that need to be extracted.
// GetLocalFilesToExtract gets all files from local that need to be extracted.
//
// Tags:
// - @displayName: List Local Files
Expand All @@ -87,7 +87,7 @@ func DataExtractionGetGithubFilesToExtract(githubRepoName string, githubRepoOwne
//
// Returns:
// - localFilesToExtract: local files to extract.
func DataExtractionGetLocalFilesToExtract(localPath string, localFileExtensions []string,
func GetLocalFilesToExtract(localPath string, localFileExtensions []string,
localFilteredDirectories []string, localExcludedDirectories []string) (localFilesToExtract []string) {
// Check if the local path exists.
if _, err := os.Stat(localPath); os.IsNotExist(err) {
Expand Down Expand Up @@ -120,7 +120,7 @@ func DataExtractionGetLocalFilesToExtract(localPath string, localFileExtensions
return *localFiles
}

// DataExtractionAppendStringSlices creates a new slice by appending all elements of the provided slices.
// AppendStringSlices creates a new slice by appending all elements of the provided slices.
//
// Tags:
// - @displayName: Append String Slices
Expand All @@ -130,7 +130,7 @@ func DataExtractionGetLocalFilesToExtract(localPath string, localFileExtensions
//
// Returns:
// - result: a new slice with all elements appended.
func DataExtractionAppendStringSlices(slice1, slice2, slice3, slice4, slice5 []string) []string {
func AppendStringSlices(slice1, slice2, slice3, slice4, slice5 []string) []string {
var result []string

// Append all elements from each slice to the result slice
Expand All @@ -143,7 +143,7 @@ func DataExtractionAppendStringSlices(slice1, slice2, slice3, slice4, slice5 []s
return result
}

// DataExtractionDownloadGithubFileContent downloads file content from github and returns checksum and content.
// DownloadGithubFileContent downloads file content from github and returns checksum and content.
//
// Tags:
// - @displayName: Download Github File Content
Expand All @@ -158,7 +158,7 @@ func DataExtractionAppendStringSlices(slice1, slice2, slice3, slice4, slice5 []s
// Returns:
// - checksum: checksum of file.
// - content: content of file.
func DataExtractionDownloadGithubFileContent(githubRepoName string, githubRepoOwner string,
func DownloadGithubFileContent(githubRepoName string, githubRepoOwner string,
githubRepoBranch string, gihubFilePath string, githubAccessToken string) (checksum string, content string) {

// Create a new GitHub client and context.
Expand Down Expand Up @@ -188,7 +188,7 @@ func DataExtractionDownloadGithubFileContent(githubRepoName string, githubRepoOw
return checksum, content
}

// DataExtractionGetLocalFileContent reads local file and returns checksum and content.
// GetLocalFileContent reads local file and returns checksum and content.
//
// Tags:
// - @displayName: Get Local File Content
Expand All @@ -199,7 +199,7 @@ func DataExtractionDownloadGithubFileContent(githubRepoName string, githubRepoOw
// Returns:
// - checksum: checksum of file.
// - content: content of file.
func DataExtractionGetLocalFileContent(localFilePath string) (checksum string, content string) {
func GetLocalFileContent(localFilePath string) (checksum string, content string) {
// Read file from local path.
contentBytes, err := os.ReadFile(localFilePath)
if err != nil {
Expand Down Expand Up @@ -228,7 +228,7 @@ func DataExtractionGetLocalFileContent(localFilePath string) (checksum string, c
return checksum, content
}

// DataExtractionGetDocumentType returns the document type of a file.
// GetDocumentType returns the document type of a file.
//
// Tags:
// - @displayName: Get Document Type
Expand All @@ -238,15 +238,15 @@ func DataExtractionGetLocalFileContent(localFilePath string) (checksum string, c
//
// Returns:
// - documentType: file extension.
func DataExtractionGetDocumentType(filePath string) (documentType string) {
func GetDocumentType(filePath string) (documentType string) {
// Extract the file extension from the file path and remove the leading period.
fileExtension := filepath.Ext(filePath)
documentType = strings.TrimPrefix(fileExtension, ".")

return documentType
}

// DataExtractionLangchainSplitter splits content into chunks using langchain.
// LangchainSplitter splits content into chunks using langchain.
//
// Tags:
// - @displayName: Split Content
Expand All @@ -259,7 +259,7 @@ func DataExtractionGetDocumentType(filePath string) (documentType string) {
//
// Returns:
// - output: chunks as an slice of strings.
func DataExtractionLangchainSplitter(content string, documentType string, chunkSize int, chunkOverlap int) (output []string) {
func LangchainSplitter(content string, documentType string, chunkSize int, chunkOverlap int) (output []string) {
output = []string{}
var splittedChunks []schema.Document
var err error
Expand Down Expand Up @@ -334,7 +334,7 @@ func DataExtractionLangchainSplitter(content string, documentType string, chunkS
return output
}

// DataExtractionGenerateDocumentTree generates a tree structure from the document chunks.
// GenerateDocumentTree generates a tree structure from the document chunks.
//
// Tags:
// - @displayName: Document Tree
Expand All @@ -352,7 +352,7 @@ func DataExtractionLangchainSplitter(content string, documentType string, chunkS
//
// Returns:
// - documentData: tree structure of the document.
func DataExtractionGenerateDocumentTree(documentName string, documentId string, documentChunks []string,
func GenerateDocumentTree(documentName string, documentId string, documentChunks []string,
embeddingsDimensions int, getSummary bool, getKeywords bool, numKeywords int, chunkSize int, numLlmWorkers int) (returnedDocumentData []sharedtypes.DbData) {

logging.Log.Debugf(internalstates.Ctx, "Processing document: %s with %v leaf chunks \n", documentName, len(documentChunks))
Expand Down
12 changes: 6 additions & 6 deletions pkg/externalfunctions/dataextraction_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ import (
"testing"
)

func TestDataExtractionGetDocumentType(t *testing.T) {
func TestGetDocumentType(t *testing.T) {
tests := []struct {
fileName string
expected string
Expand All @@ -22,14 +22,14 @@ func TestDataExtractionGetDocumentType(t *testing.T) {
}

for _, test := range tests {
actual := DataExtractionGetDocumentType(test.fileName)
actual := GetDocumentType(test.fileName)
if actual != test.expected {
t.Errorf("GetFileExtension(%s): expected %s, actual %s", test.fileName, test.expected, actual)
}
}
}

func TestDataExtractionGetLocalFileContent(t *testing.T) {
func TestGetLocalFileContent(t *testing.T) {
// Create a temporary file for testing.
tempFile, err := os.CreateTemp("", "testfile")
if err != nil {
Expand All @@ -54,7 +54,7 @@ func TestDataExtractionGetLocalFileContent(t *testing.T) {
expectedChecksum := hex.EncodeToString(hash.Sum(nil))

// Call the function with the test file.
actualChecksum, actualContent := DataExtractionGetLocalFileContent(tempFile.Name())
actualChecksum, actualContent := GetLocalFileContent(tempFile.Name())

// Check if the actual checksum matches the expected checksum.
if actualChecksum != expectedChecksum {
Expand All @@ -67,7 +67,7 @@ func TestDataExtractionGetLocalFileContent(t *testing.T) {
}
}

func TestDataExtractionAppendStringSlices(t *testing.T) {
func TestAppendStringSlices(t *testing.T) {
tests := []struct {
slice1 []string
slice2 []string
Expand All @@ -83,7 +83,7 @@ func TestDataExtractionAppendStringSlices(t *testing.T) {
}

for _, test := range tests {
actual := DataExtractionAppendStringSlices(test.slice1, test.slice2, test.slice3, test.slice4, test.slice5)
actual := AppendStringSlices(test.slice1, test.slice2, test.slice3, test.slice4, test.slice5)
if len(actual) != len(test.expected) {
t.Errorf("expected length %d, got %d", len(test.expected), len(actual))
}
Expand Down
20 changes: 10 additions & 10 deletions pkg/externalfunctions/externalfunctions.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,16 +39,16 @@ var ExternalFunctionsMap = map[string]interface{}{
"AnsysGPTGetSystemPrompt": AnsysGPTGetSystemPrompt,

// data extraction
"DataExtractionGetGithubFilesToExtract": DataExtractionGetGithubFilesToExtract,
"DataExtractionGetLocalFilesToExtract": DataExtractionGetLocalFilesToExtract,
"DataExtractionAppendStringSlices": DataExtractionAppendStringSlices,
"DataExtractionDownloadGithubFileContent": DataExtractionDownloadGithubFileContent,
"DataExtractionGetLocalFileContent": DataExtractionGetLocalFileContent,
"DataExtractionGetDocumentType": DataExtractionGetDocumentType,
"DataExtractionLangchainSplitter": DataExtractionLangchainSplitter,
"DataExtractionGenerateDocumentTree": DataExtractionGenerateDocumentTree,
"DataExtractionAddDataRequest": DataExtractionAddDataRequest,
"DataExtractionCreateCollectionRequest": DataExtractionCreateCollectionRequest,
"GetGithubFilesToExtract": GetGithubFilesToExtract,
"GetLocalFilesToExtract": GetLocalFilesToExtract,
"AppendStringSlices": AppendStringSlices,
"DownloadGithubFileContent": DownloadGithubFileContent,
"GetLocalFileContent": GetLocalFileContent,
"GetDocumentType": GetDocumentType,
"LangchainSplitter": LangchainSplitter,
"GenerateDocumentTree": GenerateDocumentTree,
"AddDataRequest": AddDataRequest,
"CreateCollectionRequest": CreateCollectionRequest,

// generic
"AssignStringToString": AssignStringToString,
Expand Down
8 changes: 4 additions & 4 deletions pkg/externalfunctions/knowledgedb.go
Original file line number Diff line number Diff line change
Expand Up @@ -707,15 +707,15 @@ func CreateDbFilter(
return filters
}

// DataExtractionAddDataRequest sends a request to the add_data endpoint.
// AddDataRequest sends a request to the add_data endpoint.
//
// Tags:
// - @displayName: Add Data
//
// Parameters:
// - collectionName: name of the collection the request is sent to.
// - data: the data to add.
func DataExtractionAddDataRequest(collectionName string, documentData []sharedtypes.DbData) {
func AddDataRequest(collectionName string, documentData []sharedtypes.DbData) {
// Create the AddDataInput object
requestObject := sharedtypes.DbAddDataInput{
CollectionName: collectionName,
Expand All @@ -739,14 +739,14 @@ func DataExtractionAddDataRequest(collectionName string, documentData []sharedty
return
}

// DataExtractionCreateCollectionRequest sends a request to the collection endpoint.
// CreateCollectionRequest sends a request to the collection endpoint.
//
// Tags:
// - @displayName: Create Collection
//
// Parameters:
// - collectionName: the name of the collection to create.
func DataExtractionCreateCollectionRequest(collectionName string) {
func CreateCollectionRequest(collectionName string) {
// Create the CreateCollectionInput object
requestObject := sharedtypes.DbCreateCollectionInput{
CollectionName: collectionName,
Expand Down

0 comments on commit 3c2a9f9

Please sign in to comment.