diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 1b90f6f89..81d48f3c8 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -19,7 +19,7 @@ jobs: - name: Run golangci-lint uses: golangci/golangci-lint-action@v6 with: - version: v1.60 + version: v1.61 args: --timeout=5m build-examples: name: Build Examples diff --git a/.golangci.yaml b/.golangci.yaml index cab783544..cf0d640b9 100644 --- a/.golangci.yaml +++ b/.golangci.yaml @@ -13,6 +13,8 @@ linters: - style - test - unused + enable: + - forbidigo disable: - gci # We don't use gci. - godox # We allow TODO lines. @@ -55,6 +57,12 @@ linters-settings: ignore-file-rules: - "**/*_test.go" - "**/mock/**/*.go" + forbidigo: + forbid: + - 'import "[^"]+/(util|common|helpers)"' + gosec: + excludes: + - G115 # https://github.com/securego/gosec/issues/1212 run: exclude-dirs: - 'exp' diff --git a/chains/sequential.go b/chains/sequential.go index 50b6d1b0a..af436f098 100644 --- a/chains/sequential.go +++ b/chains/sequential.go @@ -6,7 +6,8 @@ import ( "fmt" "strings" - "github.com/tmc/langchaingo/internal/util" + "github.com/tmc/langchaingo/internal/maputil" + "github.com/tmc/langchaingo/internal/setutil" "github.com/tmc/langchaingo/memory" "github.com/tmc/langchaingo/schema" ) @@ -42,11 +43,11 @@ func NewSequentialChain(chains []Chain, inputKeys []string, outputKeys []string, } func (c *SequentialChain) validateSeqChain() error { - knownKeys := util.ToSet(c.inputKeys) + knownKeys := setutil.ToSet(c.inputKeys) // Make sure memory keys don't collide with input keys memoryKeys := c.memory.MemoryVariables(context.Background()) - overlappingKeys := util.Intersection(memoryKeys, knownKeys) + overlappingKeys := setutil.Intersection(memoryKeys, knownKeys) if len(overlappingKeys) > 0 { return fmt.Errorf( "%w: input keys [%v] also exist in the memory keys: [%v] - please use input keys and memory keys that don't overlap", @@ -61,16 +62,16 @@ func (c *SequentialChain) validateSeqChain() error { for i, c := range c.chains { // Check that chain has input keys that are in knownKeys - missingKeys := util.Difference(c.GetInputKeys(), knownKeys) + missingKeys := setutil.Difference(c.GetInputKeys(), knownKeys) if len(missingKeys) > 0 { return fmt.Errorf( "%w: missing required input keys: [%v], only had: [%v]", - ErrChainInitialization, strings.Join(missingKeys, delimiter), strings.Join(util.ListKeys(knownKeys), delimiter), + ErrChainInitialization, strings.Join(missingKeys, delimiter), strings.Join(maputil.ListKeys(knownKeys), delimiter), ) } // Check that chain does not have output keys that are already in knownKeys - overlappingKeys := util.Intersection(c.GetOutputKeys(), knownKeys) + overlappingKeys := setutil.Intersection(c.GetOutputKeys(), knownKeys) if len(overlappingKeys) > 0 { return fmt.Errorf( "%w: chain at index %d has output keys that already exist: %v", diff --git a/embeddings/embedding.go b/embeddings/embedding.go index fe194ad0e..aaa478236 100644 --- a/embeddings/embedding.go +++ b/embeddings/embedding.go @@ -5,7 +5,7 @@ import ( "fmt" "strings" - "github.com/tmc/langchaingo/internal/util" + "github.com/tmc/langchaingo/internal/sliceutil" ) // NewEmbedder creates a new Embedder from the given EmbedderClient, with @@ -89,7 +89,7 @@ func BatchTexts(texts []string, batchSize int) [][]string { batchedTexts := make([][]string, 0, len(texts)/batchSize+1) for i := 0; i < len(texts); i += batchSize { - batchedTexts = append(batchedTexts, texts[i:util.MinInt([]int{i + batchSize, len(texts)})]) + batchedTexts = append(batchedTexts, texts[i:sliceutil.MinInt([]int{i + batchSize, len(texts)})]) } return batchedTexts diff --git a/internal/.h b/internal/.h new file mode 100644 index 000000000..5addd3592 --- /dev/null +++ b/internal/.h @@ -0,0 +1,401 @@ +backend: anthropic +messages: +- role: system + text: you are an expert go develop and tech lead at google +- role: human + text: "we dont like packages named util, fix this\n\n\n backend: anthropic\n messages: null\n model: claude-3-5-sonnet-20240620\n\n\n package mongodb\n \n import (\n \t\"context\"\n + \ \n \t\"go.mongodb.org/mongo-driver/mongo\"\n \t\"go.mongodb.org/mongo-driver/mongo/options\"\n + \ \t\"go.mongodb.org/mongo-driver/mongo/readpref\"\n )\n \n func NewClient(ctx + context.Context, url string) (*mongo.Client, error) {\n \tclient, err := mongo.Connect(ctx, + options.Client().ApplyURI(url))\n \tif err != nil {\n \t\treturn nil, err\n + \ \t}\n \n \terr = client.Ping(ctx, readpref.Primary())\n \tif err != nil {\n + \ \t\treturn nil, err\n \t}\n \n \treturn client, nil\n }\n\n\n package safeconv\n \n import \"math\"\n + \ \n // ToUint32 safely converts an int to uint32\n func ToUint32(n int) uint32 + {\n if n < 0 {\n return 0\n }\n if n > math.MaxUint32 + {\n return math.MaxUint32\n }\n return uint32(n)\n }\n \n + \ // ToUint safely converts an int to uint\n func ToUint(n int) uint {\n if + n < 0 {\n return 0\n }\n if uint64(n) > math.MaxUint {\n return + math.MaxUint\n }\n return uint(n)\n }\n \n // ToInt32 safely converts + an int to int32\n func ToInt32(n int) int32 {\n if n < 0 {\n return + 0\n }\n if n > math.MaxInt32 {\n return math.MaxInt32\n }\n + \ return int32(n)\n }\n\n package + util\n \n import (\n \t\"fmt\"\n \t\"io\"\n \t\"net/http\"\n \t\"strings\"\n + \ )\n \n // downloadImageData downloads the content from the given URL and returns + the\n // image type and data. The image type is the second part of the response's\n + \ // MIME (e.g. \"png\" from \"image/png\").\n func DownloadImageData(url string) + (string, []byte, error) {\n \tresp, err := http.Get(url) //nolint\n \tif err + != nil {\n \t\treturn \"\", nil, fmt.Errorf(\"failed to fetch image from url: + %w\", err)\n \t}\n \tdefer resp.Body.Close()\n \n \turlData, err := io.ReadAll(resp.Body)\n + \ \tif err != nil {\n \t\treturn \"\", nil, fmt.Errorf(\"failed to read image + bytes: %w\", err)\n \t}\n \n \tmimeType := resp.Header.Get(\"Content-Type\")\n + \ \n \tparts := strings.Split(mimeType, \"/\")\n \tif len(parts) != 2 {\n \t\treturn + \"\", nil, fmt.Errorf(\"invalid mime type %v\", mimeType)\n \t}\n \n \treturn + parts[1], urlData, nil\n }\n\n\n // Package + util contains general helper functions.\n package util\n \n // ToSet converts + a list to a set.\n func ToSet(list []string) map[string]struct{} {\n \tset := + make(map[string]struct{}, 0)\n \tfor _, v := range list {\n \t\tset[v] = struct{}{}\n + \ \t}\n \treturn set\n }\n \n // Difference returns the elements in list that + are not in set.\n func Difference(list []string, set map[string]struct{}) []string + {\n \tdiff := make([]string, 0)\n \tfor _, v := range list {\n \t\tif _, ok + := set[v]; !ok {\n \t\t\tdiff = append(diff, v)\n \t\t}\n \t}\n \treturn diff\n + \ }\n \n // Intersection returns the elements in list that are in set.\n func + Intersection(list []string, set map[string]struct{}) []string {\n \tintersection + := make([]string, 0)\n \tfor _, v := range list {\n \t\tif _, ok := set[v]; + ok {\n \t\t\tintersection = append(intersection, v)\n \t\t}\n \t}\n \treturn + intersection\n }\n \n func ListKeys[T any](m map[string]T) []string {\n \tkeys + := make([]string, 0, len(m))\n \tfor k := range m {\n \t\tkeys = append(keys, + k)\n \t}\n \treturn keys\n }\n \n // MinInt returns the minimum value in + nums.\n // If nums is empty, it returns 0.\n func MinInt(nums []int) int {\n + \ \tvar min int\n \tfor idx := 0; idx < len(nums); idx++ {\n \t\titem := nums[idx]\n + \ \t\tif idx == 0 {\n \t\t\tmin = item\n \t\t\tcontinue\n \t\t}\n \t\tif item + < min {\n \t\t\tmin = item\n \t\t}\n \t}\n \treturn min\n }\n\n\n package util\n \n import (\n \t\"testing\"\n + \ \n \t\"github.com/stretchr/testify/assert\"\n )\n \n func TestMinInt(t *testing.T) + {\n \tt.Parallel()\n \n \tcases := []struct {\n \t\tnums []int\n \t\texpected + int\n \t}{\n \t\t{\n \t\t\tnums: []int{1, 2, 3, 23, 34},\n \t\t\texpected: + 1,\n \t\t},\n \t\t{\n \t\t\tnums: []int{3, 2, 1, 34, 2213},\n \t\t\texpected: + 1,\n \t\t},\n \t\t{\n \t\t\tnums: nil,\n \t\t\texpected: 0,\n \t\t},\n + \ \t\t{\n \t\t\tnums: []int{},\n \t\t\texpected: 0,\n \t\t},\n \t}\n \n + \ \tfor _, tc := range cases {\n \t\tassert.Equal(t, tc.expected, MinInt(tc.nums))\n + \ \t}\n }\n\n\n" +- role: ai + text: |- + You're right, using a generic "util" package is often considered a poor practice in Go. Let's refactor this to use more specific and meaningful package names. Here's how we can restructure the code: + + 1. For `util/download.go`, we can create a new package called `imageutil`: + + ```go + // New file: internal/imageutil/download.go + package imageutil + + // ... (rest of the code remains the same) + ``` + + 2. For `util/util.go`, we can split its functions into more specific packages: + + ```go + // New file: internal/setutil/set.go + package setutil + + // ToSet converts a list to a set. + func ToSet(list []string) map[string]struct{} { + // ... (implementation) + } + + // Difference returns the elements in list that are not in set. + func Difference(list []string, set map[string]struct{}) []string { + // ... (implementation) + } + + // Intersection returns the elements in list that are in set. + func Intersection(list []string, set map[string]struct{}) []string { + // ... (implementation) + } + + // New file: internal/maputil/map.go + package maputil + + func ListKeys[T any](m map[string]T) []string { + // ... (implementation) + } + + // New file: internal/sliceutil/slice.go + package sliceutil + + // MinInt returns the minimum value in nums. + // If nums is empty, it returns 0. + func MinInt(nums []int) int { + // ... (implementation) + } + ``` + + 3. Update `util/util_test.go` to reflect the new package structure: + + ```go + // New file: internal/sliceutil/slice_test.go + package sliceutil + + import ( + "testing" + + "github.com/stretchr/testify/assert" + ) + + func TestMinInt(t *testing.T) { + // ... (test cases remain the same) + } + ``` + + 4. Remove the original `util` directory. + + 5. Update any imports in other parts of your project to use these new package names instead of the generic `util` package. + + This refactoring creates more focused and descriptive package names, making the code more organized and easier to navigate. Each package now has a clear, specific purpose, which is a better practice in Go development. +- role: human + text: |+ + output a bash script to do all of that + +- role: ai + text: |- + Here's a bash script that will perform the refactoring we discussed: + + ```bash + #!/bin/bash + + set -e + + # Create new directories + mkdir -p internal/imageutil internal/setutil internal/maputil internal/sliceutil + + # Move and rename files + mv internal/util/download.go internal/imageutil/download.go + sed -i 's/package util/package imageutil/' internal/imageutil/download.go + + # Split util.go into multiple files + awk '/ToSet/,/^}/' internal/util/util.go > internal/setutil/set.go + awk '/Difference/,/^}/' internal/util/util.go >> internal/setutil/set.go + awk '/Intersection/,/^}/' internal/util/util.go >> internal/setutil/set.go + sed -i '1ipackage setutil\n' internal/setutil/set.go + + awk '/ListKeys/,/^}/' internal/util/util.go > internal/maputil/map.go + sed -i '1ipackage maputil\n' internal/maputil/map.go + + awk '/MinInt/,/^}/' internal/util/util.go > internal/sliceutil/slice.go + sed -i '1ipackage sliceutil\n' internal/sliceutil/slice.go + + # Move and update test file + mv internal/util/util_test.go internal/sliceutil/slice_test.go + sed -i 's/package util/package sliceutil/' internal/sliceutil/slice_test.go + + # Remove old util directory + rm -rf internal/util + + # Update imports in the project + find . -type f -name "*.go" -exec sed -i 's,"github.com/tmc/langchaingo/internal/util","github.com/tmc/langchaingo/internal/imageutil",g' {} + + find . -type f -name "*.go" -exec sed -i 's,"github.com/tmc/langchaingo/internal/util","github.com/tmc/langchaingo/internal/setutil",g' {} + + find . -type f -name "*.go" -exec sed -i 's,"github.com/tmc/langchaingo/internal/util","github.com/tmc/langchaingo/internal/maputil",g' {} + + find . -type f -name "*.go" -exec sed -i 's,"github.com/tmc/langchaingo/internal/util","github.com/tmc/langchaingo/internal/sliceutil",g' {} + + + echo "Refactoring complete. Please review the changes and update any remaining imports manually." + ``` + + This script does the following: + + 1. Creates new directories for the refactored packages. + 2. Moves and renames the `download.go` file to the new `imageutil` package. + 3. Splits the `util.go` file into separate files for `setutil`, `maputil`, and `sliceutil`. + 4. Moves and updates the test file. + 5. Removes the old `util` directory. + 6. Attempts to update imports throughout the project. + + To use this script: + + 1. Save it as `refactor_util.sh` in the root directory of your project. + 2. Make it executable: `chmod +x refactor_util.sh` + 3. Run it: `./refactor_util.sh` + + Please note: + - This script assumes you're running it from the root of your project. + - It uses `sed` for text replacement, which may behave differently on macOS. If you're on macOS, you might need to modify the `sed` commands. + - Always review the changes after running the script and make any necessary manual adjustments. + - It's recommended to run this on a version-controlled codebase so you can easily review and revert changes if needed. +- role: human + text: |+ + is that macos compat + +- role: ai + text: |- + You're right to ask about macOS compatibility. The script as written is not fully compatible with macOS due to differences in the `sed` command between GNU sed (used in most Linux distributions) and BSD sed (used in macOS). + + Here's an updated version of the script that should work on both macOS and Linux: + + ```bash + #!/bin/bash + + set -e + + # Function to use the correct sed syntax + sedfix() { + if [[ "$OSTYPE" == "darwin"* ]]; then + sed -i '' "$@" + else + sed -i "$@" + fi + } + + # Create new directories + mkdir -p internal/imageutil internal/setutil internal/maputil internal/sliceutil + + # Move and rename files + mv internal/util/download.go internal/imageutil/download.go + sedfix 's/package util/package imageutil/' internal/imageutil/download.go + + # Split util.go into multiple files + awk '/ToSet/,/^}/' internal/util/util.go > internal/setutil/set.go + awk '/Difference/,/^}/' internal/util/util.go >> internal/setutil/set.go + awk '/Intersection/,/^}/' internal/util/util.go >> internal/setutil/set.go + sedfix '1ipackage setutil\ + ' internal/setutil/set.go + + awk '/ListKeys/,/^}/' internal/util/util.go > internal/maputil/map.go + sedfix '1ipackage maputil\ + ' internal/maputil/map.go + + awk '/MinInt/,/^}/' internal/util/util.go > internal/sliceutil/slice.go + sedfix '1ipackage sliceutil\ + ' internal/sliceutil/slice.go + + # Move and update test file + mv internal/util/util_test.go internal/sliceutil/slice_test.go + sedfix 's/package util/package sliceutil/' internal/sliceutil/slice_test.go + + # Remove old util directory + rm -rf internal/util + + # Update imports in the project + find . -type f -name "*.go" -exec sedfix 's,"github.com/tmc/langchaingo/internal/util","github.com/tmc/langchaingo/internal/imageutil",g' {} + + find . -type f -name "*.go" -exec sedfix 's,"github.com/tmc/langchaingo/internal/util","github.com/tmc/langchaingo/internal/setutil",g' {} + + find . -type f -name "*.go" -exec sedfix 's,"github.com/tmc/langchaingo/internal/util","github.com/tmc/langchaingo/internal/maputil",g' {} + + find . -type f -name "*.go" -exec sedfix 's,"github.com/tmc/langchaingo/internal/util","github.com/tmc/langchaingo/internal/sliceutil",g' {} + + + echo "Refactoring complete. Please review the changes and update any remaining imports manually." + ``` + + The main changes in this version: + + 1. Added a `sedfix` function that detects the operating system and uses the appropriate `sed` syntax. + 2. Modified the `sed` commands to use this `sedfix` function. + 3. Adjusted the syntax for inserting text at the beginning of files to be compatible with both BSD and GNU sed. + + This script should now work on both macOS and Linux systems. As before, save it as `refactor_util.sh`, make it executable with `chmod +x refactor_util.sh`, and run it from your project's root directory. + + Remember to review all changes after running the script and make any necessary manual adjustments, especially for import statements that might not have been caught by the automatic replacements. +- role: human + text: |+ + sed: 1: "1ipackage setutil\ + ": command i expects \ followed by text + +- role: ai + text: |- + I apologize for the confusion. You're right, and thank you for pointing that out. The error occurs because the BSD sed (used in macOS) has a different syntax for the insert command. Let's modify the script to handle this difference more effectively: + + ```bash + #!/bin/bash + + set -e + + # Function to use the correct sed syntax + sedfix() { + if [[ "$OSTYPE" == "darwin"* ]]; then + sed -i '' "$@" + else + sed -i "$@" + fi + } + + # Function to insert text at the beginning of a file + insert_at_beginning() { + if [[ "$OSTYPE" == "darwin"* ]]; then + sed -i '' "1i\\ + $1 + " "$2" + else + sed -i "1i$1" "$2" + fi + } + + # Create new directories + mkdir -p internal/imageutil internal/setutil internal/maputil internal/sliceutil + + # Move and rename files + mv internal/util/download.go internal/imageutil/download.go + sedfix 's/package util/package imageutil/' internal/imageutil/download.go + + # Split util.go into multiple files + awk '/ToSet/,/^}/' internal/util/util.go > internal/setutil/set.go + awk '/Difference/,/^}/' internal/util/util.go >> internal/setutil/set.go + awk '/Intersection/,/^}/' internal/util/util.go >> internal/setutil/set.go + insert_at_beginning "package setutil" internal/setutil/set.go + + awk '/ListKeys/,/^}/' internal/util/util.go > internal/maputil/map.go + insert_at_beginning "package maputil" internal/maputil/map.go + + awk '/MinInt/,/^}/' internal/util/util.go > internal/sliceutil/slice.go + insert_at_beginning "package sliceutil" internal/sliceutil/slice.go + + # Move and update test file + mv internal/util/util_test.go internal/sliceutil/slice_test.go + sedfix 's/package util/package sliceutil/' internal/sliceutil/slice_test.go + + # Remove old util directory + rm -rf internal/util + + # Update imports in the project + find . -type f -name "*.go" -exec sedfix 's,"github.com/tmc/langchaingo/internal/util","github.com/tmc/langchaingo/internal/imageutil",g' {} + + find . -type f -name "*.go" -exec sedfix 's,"github.com/tmc/langchaingo/internal/util","github.com/tmc/langchaingo/internal/setutil",g' {} + + find . -type f -name "*.go" -exec sedfix 's,"github.com/tmc/langchaingo/internal/util","github.com/tmc/langchaingo/internal/maputil",g' {} + + find . -type f -name "*.go" -exec sedfix 's,"github.com/tmc/langchaingo/internal/util","github.com/tmc/langchaingo/internal/sliceutil",g' {} + + + echo "Refactoring complete. Please review the changes and update any remaining imports manually." + ``` + + The main changes in this version: + + 1. Added a new function `insert_at_beginning` that handles the differences between BSD sed and GNU sed for inserting text at the beginning of a file. + 2. Used this new function to insert the package declarations at the beginning of each new file. + + This script should now work correctly on both macOS and Linux systems. As before: + + 1. Save it as `refactor_util.sh` in the root directory of your project. + 2. Make it executable: `chmod +x refactor_util.sh` + 3. Run it: `./refactor_util.sh` + + After running the script, please review all changes and make any necessary manual adjustments, especially for import statements that might not have been caught by the automatic replacements. +- role: human + text: |+ + find: sedfix: No such file or directory + +- role: ai + text: "I apologize for the confusion. The error occurs because the `find` command + is trying to execute `sedfix` as a command, which doesn't exist in the file system. + We need to modify our script to use the `sedfix` function within the `find` command. + Here's a corrected version of the script:\n\n```bash\n#!/bin/bash\n\nset -e\n\n# + Function to use the correct sed syntax\nsedfix() {\n if [[ \"$OSTYPE\" == \"darwin\"* + ]]; then\n sed -i '' \"$@\"\n else\n sed -i \"$@\"\n fi\n}\n\n# + Function to insert text at the beginning of a file\ninsert_at_beginning() {\n + \ if [[ \"$OSTYPE\" == \"darwin\"* ]]; then\n sed -i '' \"1i\\\\\n$1\n\" + \"$2\"\n else\n sed -i \"1i$1\" \"$2\"\n fi\n}\n\n# Create new directories\nmkdir + -p internal/imageutil internal/setutil internal/maputil internal/sliceutil\n\n# + Move and rename files\nmv internal/util/download.go internal/imageutil/download.go\nsedfix + 's/package util/package imageutil/' internal/imageutil/download.go\n\n# Split + util.go into multiple files\nawk '/ToSet/,/^}/' internal/util/util.go > internal/setutil/set.go\nawk + '/Difference/,/^}/' internal/util/util.go >> internal/setutil/set.go\nawk '/Intersection/,/^}/' + internal/util/util.go >> internal/setutil/set.go\ninsert_at_beginning \"package + setutil\" internal/setutil/set.go\n\nawk '/ListKeys/,/^}/' internal/util/util.go + > internal/maputil/map.go\ninsert_at_beginning \"package maputil\" internal/maputil/map.go\n\nawk + '/MinInt/,/^}/' internal/util/util.go > internal/sliceutil/slice.go\ninsert_at_beginning + \"package sliceutil\" internal/sliceutil/slice.go\n\n# Move and update test file\nmv + internal/util/util_test.go internal/sliceutil/slice_test.go\nsedfix 's/package + util/package sliceutil/' internal/sliceutil/slice_test.go\n\n# Remove old util + directory\nrm -rf internal/util\n\n# Update imports in the project\nupdate_imports() + {\n local old_import=\"github.com/tmc/langchaingo/internal/util\"\n local + new_imports=(\n \"github.com/tmc/langchaingo/internal/imageutil\"\n \"github.com/tmc/langchaingo/internal/setutil\"\n + \ \"github.com/tmc/langchaingo/internal/maputil\"\n \"github.com/tmc/langchaingo/internal/sliceutil\"\n + \ )\n \n for new_import in \"${new_imports[@]}\"; do\n sedfix \"s,\\\"$old_import\\\",\\\"$new_import\\\",g\" + \"$1\"\n done\n}\n\nexport -f sedfix update_imports\nfind . -type f -name \"*.go\" + -exec bash -c 'update_imports \"$0\"' {} \\;\n\necho \"Refactoring complete. Please + review the changes and update any remaining imports manually.\"\n```\n\nThe main + changes in this version:\n\n1. Created an `update_imports` function that performs + all the import replacements for a single file.\n2. Used `export -f` to make the + `sedfix` and `update_imports` functions available to subshells.\n3. Modified the + `find` command to use `bash -c` to call our `update_imports` function for each + Go file.\n\nThis script should now work correctly on both macOS and Linux systems + without the \"No such file or directory\" error. As before:\n\n1. Save it as `refactor_util.sh` + in the root directory of your project.\n2. Make it executable: `chmod +x refactor_util.sh`\n3. + Run it: `./refactor_util.sh`\n\nAfter running the script, please review all changes + and make any necessary manual adjustments, especially for import statements that + might not have been caught by the automatic replacements." +model: claude-3-5-sonnet-20240620 diff --git a/internal/util/download.go b/internal/imageutil/download.go similarity index 97% rename from internal/util/download.go rename to internal/imageutil/download.go index af1a93218..4ad8330c3 100644 --- a/internal/util/download.go +++ b/internal/imageutil/download.go @@ -1,4 +1,4 @@ -package util +package imageutil import ( "fmt" diff --git a/internal/maputil/map.go b/internal/maputil/map.go new file mode 100644 index 000000000..f7e64dc1d --- /dev/null +++ b/internal/maputil/map.go @@ -0,0 +1,9 @@ +package maputil + +func ListKeys[T any](m map[string]T) []string { + keys := make([]string, 0, len(m)) + for k := range m { + keys = append(keys, k) + } + return keys +} diff --git a/internal/util/util.go b/internal/setutil/set.go similarity index 59% rename from internal/util/util.go rename to internal/setutil/set.go index 54013d2f4..a3bb0616b 100644 --- a/internal/util/util.go +++ b/internal/setutil/set.go @@ -1,5 +1,4 @@ -// Package util contains general helper functions. -package util +package setutil // ToSet converts a list to a set. func ToSet(list []string) map[string]struct{} { @@ -31,28 +30,3 @@ func Intersection(list []string, set map[string]struct{}) []string { } return intersection } - -func ListKeys[T any](m map[string]T) []string { - keys := make([]string, 0, len(m)) - for k := range m { - keys = append(keys, k) - } - return keys -} - -// MinInt returns the minimum value in nums. -// If nums is empty, it returns 0. -func MinInt(nums []int) int { - var min int - for idx := 0; idx < len(nums); idx++ { - item := nums[idx] - if idx == 0 { - min = item - continue - } - if item < min { - min = item - } - } - return min -} diff --git a/internal/sliceutil/slice.go b/internal/sliceutil/slice.go new file mode 100644 index 000000000..5acc85aa1 --- /dev/null +++ b/internal/sliceutil/slice.go @@ -0,0 +1,18 @@ +package sliceutil + +// MinInt returns the minimum value in nums. +// If nums is empty, it returns 0. +func MinInt(nums []int) int { + var min int + for idx := 0; idx < len(nums); idx++ { + item := nums[idx] + if idx == 0 { + min = item + continue + } + if item < min { + min = item + } + } + return min +} diff --git a/internal/util/util_test.go b/internal/sliceutil/slice_test.go similarity index 96% rename from internal/util/util_test.go rename to internal/sliceutil/slice_test.go index e304cf752..15be575f8 100644 --- a/internal/util/util_test.go +++ b/internal/sliceutil/slice_test.go @@ -1,4 +1,4 @@ -package util +package sliceutil import ( "testing" diff --git a/llms/googleai/googleai.go b/llms/googleai/googleai.go index ee8e77c9b..46c357d19 100644 --- a/llms/googleai/googleai.go +++ b/llms/googleai/googleai.go @@ -10,7 +10,7 @@ import ( "strings" "github.com/google/generative-ai-go/genai" - "github.com/tmc/langchaingo/internal/util" + "github.com/tmc/langchaingo/internal/imageutil" "github.com/tmc/langchaingo/llms" "google.golang.org/api/iterator" ) @@ -187,7 +187,7 @@ func convertParts(parts []llms.ContentPart) ([]genai.Part, error) { case llms.BinaryContent: out = genai.Blob{MIMEType: p.MIMEType, Data: p.Data} case llms.ImageURLContent: - typ, data, err := util.DownloadImageData(p.URL) + typ, data, err := imageutil.DownloadImageData(p.URL) if err != nil { return nil, err } diff --git a/llms/googleai/vertex/vertex.go b/llms/googleai/vertex/vertex.go index 03000bb2f..676efaab5 100644 --- a/llms/googleai/vertex/vertex.go +++ b/llms/googleai/vertex/vertex.go @@ -13,7 +13,7 @@ import ( "strings" "cloud.google.com/go/vertexai/genai" - "github.com/tmc/langchaingo/internal/util" + "github.com/tmc/langchaingo/internal/imageutil" "github.com/tmc/langchaingo/llms" "google.golang.org/api/iterator" ) @@ -190,7 +190,7 @@ func convertParts(parts []llms.ContentPart) ([]genai.Part, error) { case llms.BinaryContent: out = genai.Blob{MIMEType: p.MIMEType, Data: p.Data} case llms.ImageURLContent: - typ, data, err := util.DownloadImageData(p.URL) + typ, data, err := imageutil.DownloadImageData(p.URL) if err != nil { return nil, err } diff --git a/llms/llamafile/llamafilellm.go b/llms/llamafile/llamafilellm.go index c90e726dc..8e9cc6088 100644 --- a/llms/llamafile/llamafilellm.go +++ b/llms/llamafile/llamafilellm.go @@ -3,7 +3,6 @@ package llamafile import ( "context" "errors" - "math" "github.com/tmc/langchaingo/callbacks" "github.com/tmc/langchaingo/llms" @@ -168,33 +167,19 @@ func makeLlamaOptionsFromOptions(input *llamafileclient.ChatRequest, opts llms.C // Initialize llamaOptions with values from opts streamValue := opts.StreamingFunc != nil - input.FrequencyPenalty = opts.FrequencyPenalty // Assuming FrequencyPenalty correlates to FrequencyPenalty; adjust if necessary - input.MinP = float64(opts.MinLength) // Assuming there's a direct correlation; adjust if necessary - input.Model = opts.Model // Assuming Model correlates to Model; adjust if necessary - input.NCtx = opts.N // Assuming N corresponds to NCtx; if not, adjust. - input.NPredict = opts.MaxTokens // Assuming MaxTokens correlates to NPredict; - input.PresencePenalty = opts.PresencePenalty // Assuming PresencePenalty correlates to PresencePenalty; - input.RepeatPenalty = opts.RepetitionPenalty // Assuming RepetitionPenalty correlates to RepeatPenalty; - input.Seed = uint32(max(0, min(opts.Seed, math.MaxUint32))) // Convert int to uint32 - input.Stop = opts.StopWords // Assuming StopWords correlates to Stop; - input.Stream = &streamValue // True if StreamingFunc provided; adjust logic as needed. - input.Temperature = opts.Temperature // Assuming Temperature correlates to Temperature for precision; - input.TopK = opts.TopK // Assuming TopK correlates to TopK; - input.TopP = opts.TopP // Assuming TopP correlates to TopP; + input.FrequencyPenalty = opts.FrequencyPenalty // Assuming FrequencyPenalty correlates to FrequencyPenalty; adjust if necessary + input.MinP = float64(opts.MinLength) // Assuming there's a direct correlation; adjust if necessary + input.Model = opts.Model // Assuming Model correlates to Model; adjust if necessary + input.NCtx = opts.N // Assuming N corresponds to NCtx; if not, adjust. + input.NPredict = opts.MaxTokens // Assuming MaxTokens correlates to NPredict; + input.PresencePenalty = opts.PresencePenalty // Assuming PresencePenalty correlates to PresencePenalty; + input.RepeatPenalty = opts.RepetitionPenalty // Assuming RepetitionPenalty correlates to RepeatPenalty; + input.Seed = uint32(opts.Seed) // Convert int to uint32 + input.Stop = opts.StopWords // Assuming StopWords correlates to Stop; + input.Stream = &streamValue // True if StreamingFunc provided; adjust logic as needed. + input.Temperature = opts.Temperature // Assuming Temperature correlates to Temperature for precision; + input.TopK = opts.TopK // Assuming TopK correlates to TopK; + input.TopP = opts.TopP // Assuming TopP correlates to TopP; return input } - -func max(a, b int) int { - if a > b { - return a - } - return b -} - -func min(a, b int) int { - if a < b { - return a - } - return b -} diff --git a/llms/watsonx/watsonxllm.go b/llms/watsonx/watsonxllm.go index 8978c9ddc..42f00c272 100644 --- a/llms/watsonx/watsonxllm.go +++ b/llms/watsonx/watsonxllm.go @@ -112,19 +112,19 @@ func toWatsonxOptions(options *[]llms.CallOption) []wx.GenerateOption { o = append(o, wx.WithTopP(opts.TopP)) } if opts.TopK != -1 { - o = append(o, wx.WithTopK(uint(max(0, opts.TopK)))) + o = append(o, wx.WithTopK(uint(opts.TopK))) } if opts.Temperature != -1 { o = append(o, wx.WithTemperature(opts.Temperature)) } if opts.Seed != -1 { - o = append(o, wx.WithRandomSeed(uint(max(0, opts.Seed)))) + o = append(o, wx.WithRandomSeed(uint(opts.Seed))) } if opts.RepetitionPenalty != -1 { o = append(o, wx.WithRepetitionPenalty(opts.RepetitionPenalty)) } if opts.MaxTokens != -1 { - o = append(o, wx.WithMaxNewTokens(uint(max(0, opts.MaxTokens)))) + o = append(o, wx.WithMaxNewTokens(uint(opts.MaxTokens))) } if len(opts.StopWords) > 0 { o = append(o, wx.WithStopSequences(opts.StopWords)) @@ -143,10 +143,3 @@ func toWatsonxOptions(options *[]llms.CallOption) []wx.GenerateOption { return o } - -func max(a, b int) int { - if a > b { - return a - } - return b -} diff --git a/vectorstores/chroma/chroma.go b/vectorstores/chroma/chroma.go index 9be10e677..f252ddd2c 100644 --- a/vectorstores/chroma/chroma.go +++ b/vectorstores/chroma/chroma.go @@ -4,7 +4,6 @@ import ( "context" "errors" "fmt" - "math" chromago "github.com/amikos-tech/chroma-go" "github.com/amikos-tech/chroma-go/openai" @@ -140,7 +139,7 @@ func (s Store) SimilaritySearch(ctx context.Context, query string, numDocuments } filter := s.getNamespacedFilter(opts) - qr, queryErr := s.collection.Query(ctx, []string{query}, int32(max(0, min(numDocuments, math.MaxInt32))), filter, nil, s.includes) + qr, queryErr := s.collection.Query(ctx, []string{query}, safeIntToInt32(numDocuments), filter, nil, s.includes) if queryErr != nil { return nil, queryErr } @@ -214,16 +213,6 @@ func (s Store) getNamespacedFilter(opts vectorstores.Options) map[string]any { return map[string]any{"$and": []map[string]any{nameSpaceFilter, filter}} } -func max(a, b int) int { - if a > b { - return a - } - return b -} - -func min(a, b int) int { - if a < b { - return a - } - return b +func safeIntToInt32(n int) int32 { + return int32(max(0, n)) } diff --git a/vectorstores/pinecone/pinecone.go b/vectorstores/pinecone/pinecone.go index 94acc0706..34c326ee4 100644 --- a/vectorstores/pinecone/pinecone.go +++ b/vectorstores/pinecone/pinecone.go @@ -4,7 +4,6 @@ import ( "context" "encoding/json" "errors" - "math" "github.com/google/uuid" "github.com/pinecone-io/go-pinecone/pinecone" @@ -160,7 +159,7 @@ func (s Store) SimilaritySearch(ctx context.Context, query string, numDocuments &ctx, &pinecone.QueryByVectorValuesRequest{ Vector: vector, - TopK: uint32(max(0, min(numDocuments, math.MaxUint32))), + TopK: safeIntToUint32(numDocuments), Filter: protoFilterStruct, IncludeMetadata: true, IncludeValues: true, @@ -247,16 +246,6 @@ func (s Store) createProtoStructFilter(filter any) (*structpb.Struct, error) { return &filterStruct, nil } -func max(a, b int) int { - if a > b { - return a - } - return b -} - -func min(a, b int) int { - if a < b { - return a - } - return b +func safeIntToUint32(n int) uint32 { + return uint32(max(0, n)) }