Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: fix default seps in recursive splitter #81

Merged
merged 3 commits into from
Jan 24, 2025
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 37 additions & 0 deletions .github/workflows/pr-check.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ jobs:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0

- name: Check License Header
uses: apache/skywalking-eyes/[email protected]
Expand All @@ -16,6 +18,41 @@ jobs:
- name: Check Spell
uses: crate-ci/typos@master

- name: Check README.md and examples
run: |-
# Fetch the main branch
git fetch origin main

# Find only newly added directories containing go.mod compared to main branch
ADDED_DIRS=$(git diff --diff-filter=A --name-only origin/main...HEAD | grep "go.mod" | xargs -L1 dirname || true)
echo "Newly added directories (compared to main): $ADDED_DIRS"

# Check if README.md exists in each new directory
for dir in $ADDED_DIRS; do
if [ ! -f "$dir/README.md" ]; then
echo "Error: README.md not found in newly added module directory: $dir"
echo "Please add a [README.md] file to the directory."
echo "📢 You can refer to the following example: https://github.com/cloudwego/eino-ext/blob/main/components/tool/duckduckgo/README.md"
exit 1
fi
done

if [ -n "$ADDED_DIRS" ]; then
echo "All newly added go.mod directories have README.md files ✓"
else
echo "No new go.mod directories were added in this PR ✓"
fi

# Check if examples exist in each new directory
for dir in $ADDED_DIRS; do
if [ ! -d "$dir/examples" ]; then
echo "Error: examples not found in $dir"
echo "📢 examples directory is required for new components, please add some examples for your component usage."
exit 1
fi
done
echo "All newly added go.mod directories have examples ✓"

# golangci-lint:
# runs-on: ubuntu-latest
# steps:
Expand Down
2 changes: 1 addition & 1 deletion callbacks/langfuse/go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ go 1.18
require (
github.com/bytedance/mockey v1.2.13
github.com/bytedance/sonic v1.12.7
github.com/cloudwego/eino v0.3.5
github.com/cloudwego/eino v0.3.7
github.com/cloudwego/eino-ext/libs/acl/langfuse v0.0.0-20250113033825-eb19b2b6b386
github.com/golang/mock v1.6.0
github.com/stretchr/testify v1.10.0
Expand Down
4 changes: 2 additions & 2 deletions callbacks/langfuse/go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@ github.com/cenkalti/backoff/v4 v4.3.0/go.mod h1:Y3VNntkOUPxTVeUxJ/G5vcM//AlwfmyY
github.com/certifi/gocertifi v0.0.0-20190105021004-abcd57078448/go.mod h1:GJKEexRPVJrBSOjoqN5VNOIKJ5Q3RViH6eu3puDRwx4=
github.com/cloudwego/base64x v0.1.4 h1:jwCgWpFanWmN8xoIUHa2rtzmkd5J2plF/dnLS6Xd/0Y=
github.com/cloudwego/base64x v0.1.4/go.mod h1:0zlkT4Wn5C6NdauXdJRhSKRlJvmclQ1hhJgA0rcu/8w=
github.com/cloudwego/eino v0.3.5 h1:9PkAOX/phFifrGXkfl4L9rdecxOQJBJY1FtZqF4bz3c=
github.com/cloudwego/eino v0.3.5/go.mod h1:+kmJimGEcKuSI6OKhet7kBedkm1WUZS3H1QRazxgWUo=
github.com/cloudwego/eino v0.3.7 h1:PE1yFaAPVenRhDl0x6N1U2rKrfZkSr1hKlcacO6P+VA=
github.com/cloudwego/eino v0.3.7/go.mod h1:+kmJimGEcKuSI6OKhet7kBedkm1WUZS3H1QRazxgWUo=
github.com/cloudwego/eino-ext/libs/acl/langfuse v0.0.0-20250113033825-eb19b2b6b386 h1:dF//5iW+PCS8ZnZ0PwmO2enn3Oek++mbgB6dmaJAz6o=
github.com/cloudwego/eino-ext/libs/acl/langfuse v0.0.0-20250113033825-eb19b2b6b386/go.mod h1:77jqGUJZjxg+V/sJ8S6dd0JtRLO782yVWHmhuFgb9ig=
github.com/cloudwego/iasm v0.2.0/go.mod h1:8rXZaNYT2n95jn+zTI1sDr+IgcD2GVs0nlbbQPiEFhY=
Expand Down
2 changes: 1 addition & 1 deletion components/document/loader/file/go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ module github.com/cloudwego/eino-ext/components/document/loader/file
go 1.18

require (
github.com/cloudwego/eino v0.3.4
github.com/cloudwego/eino v0.3.7
github.com/stretchr/testify v1.9.0
)

Expand Down
4 changes: 2 additions & 2 deletions components/document/loader/file/go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,8 @@ github.com/bytedance/sonic/loader v0.2.0/go.mod h1:ncP89zfokxS5LZrJxl5z0UJcsk4M4
github.com/certifi/gocertifi v0.0.0-20190105021004-abcd57078448/go.mod h1:GJKEexRPVJrBSOjoqN5VNOIKJ5Q3RViH6eu3puDRwx4=
github.com/cloudwego/base64x v0.1.4 h1:jwCgWpFanWmN8xoIUHa2rtzmkd5J2plF/dnLS6Xd/0Y=
github.com/cloudwego/base64x v0.1.4/go.mod h1:0zlkT4Wn5C6NdauXdJRhSKRlJvmclQ1hhJgA0rcu/8w=
github.com/cloudwego/eino v0.3.4 h1:trWw8lKU1t1b7PMKSW1GXEJ4H2rLiGWFyVoMJJ3pRDg=
github.com/cloudwego/eino v0.3.4/go.mod h1:+kmJimGEcKuSI6OKhet7kBedkm1WUZS3H1QRazxgWUo=
github.com/cloudwego/eino v0.3.7 h1:PE1yFaAPVenRhDl0x6N1U2rKrfZkSr1hKlcacO6P+VA=
github.com/cloudwego/eino v0.3.7/go.mod h1:+kmJimGEcKuSI6OKhet7kBedkm1WUZS3H1QRazxgWUo=
github.com/cloudwego/iasm v0.2.0 h1:1KNIy1I1H9hNNFEEH3DVnI4UujN+1zjpuk6gwHLTssg=
github.com/cloudwego/iasm v0.2.0/go.mod h1:8rXZaNYT2n95jn+zTI1sDr+IgcD2GVs0nlbbQPiEFhY=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
Expand Down
2 changes: 1 addition & 1 deletion components/document/loader/s3/go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ require (
github.com/aws/aws-sdk-go-v2/credentials v1.17.42
github.com/aws/aws-sdk-go-v2/service/s3 v1.66.2
github.com/bytedance/mockey v1.2.13
github.com/cloudwego/eino v0.3.4
github.com/cloudwego/eino v0.3.7
github.com/stretchr/testify v1.9.0
)

Expand Down
4 changes: 2 additions & 2 deletions components/document/loader/s3/go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -49,8 +49,8 @@ github.com/bytedance/sonic/loader v0.2.0/go.mod h1:ncP89zfokxS5LZrJxl5z0UJcsk4M4
github.com/certifi/gocertifi v0.0.0-20190105021004-abcd57078448/go.mod h1:GJKEexRPVJrBSOjoqN5VNOIKJ5Q3RViH6eu3puDRwx4=
github.com/cloudwego/base64x v0.1.4 h1:jwCgWpFanWmN8xoIUHa2rtzmkd5J2plF/dnLS6Xd/0Y=
github.com/cloudwego/base64x v0.1.4/go.mod h1:0zlkT4Wn5C6NdauXdJRhSKRlJvmclQ1hhJgA0rcu/8w=
github.com/cloudwego/eino v0.3.4 h1:trWw8lKU1t1b7PMKSW1GXEJ4H2rLiGWFyVoMJJ3pRDg=
github.com/cloudwego/eino v0.3.4/go.mod h1:+kmJimGEcKuSI6OKhet7kBedkm1WUZS3H1QRazxgWUo=
github.com/cloudwego/eino v0.3.7 h1:PE1yFaAPVenRhDl0x6N1U2rKrfZkSr1hKlcacO6P+VA=
github.com/cloudwego/eino v0.3.7/go.mod h1:+kmJimGEcKuSI6OKhet7kBedkm1WUZS3H1QRazxgWUo=
github.com/cloudwego/iasm v0.2.0 h1:1KNIy1I1H9hNNFEEH3DVnI4UujN+1zjpuk6gwHLTssg=
github.com/cloudwego/iasm v0.2.0/go.mod h1:8rXZaNYT2n95jn+zTI1sDr+IgcD2GVs0nlbbQPiEFhY=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
Expand Down
2 changes: 1 addition & 1 deletion components/document/loader/url/go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ module github.com/cloudwego/eino-ext/components/document/loader/url
go 1.18

require (
github.com/cloudwego/eino v0.3.4
github.com/cloudwego/eino v0.3.7
github.com/cloudwego/eino-ext/components/document/parser/html v0.0.0-20241224063832-9fbcc0e56c28
github.com/stretchr/testify v1.9.0
)
Expand Down
4 changes: 2 additions & 2 deletions components/document/loader/url/go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@ github.com/bytedance/sonic/loader v0.2.0/go.mod h1:ncP89zfokxS5LZrJxl5z0UJcsk4M4
github.com/certifi/gocertifi v0.0.0-20190105021004-abcd57078448/go.mod h1:GJKEexRPVJrBSOjoqN5VNOIKJ5Q3RViH6eu3puDRwx4=
github.com/cloudwego/base64x v0.1.4 h1:jwCgWpFanWmN8xoIUHa2rtzmkd5J2plF/dnLS6Xd/0Y=
github.com/cloudwego/base64x v0.1.4/go.mod h1:0zlkT4Wn5C6NdauXdJRhSKRlJvmclQ1hhJgA0rcu/8w=
github.com/cloudwego/eino v0.3.4 h1:trWw8lKU1t1b7PMKSW1GXEJ4H2rLiGWFyVoMJJ3pRDg=
github.com/cloudwego/eino v0.3.4/go.mod h1:+kmJimGEcKuSI6OKhet7kBedkm1WUZS3H1QRazxgWUo=
github.com/cloudwego/eino v0.3.7 h1:PE1yFaAPVenRhDl0x6N1U2rKrfZkSr1hKlcacO6P+VA=
github.com/cloudwego/eino v0.3.7/go.mod h1:+kmJimGEcKuSI6OKhet7kBedkm1WUZS3H1QRazxgWUo=
github.com/cloudwego/eino-ext/components/document/parser/html v0.0.0-20241224063832-9fbcc0e56c28 h1:Z1cWrlqxdc5IuPV1UcqoW2BGlFr7IQJHGwn7I3Tax0A=
github.com/cloudwego/eino-ext/components/document/parser/html v0.0.0-20241224063832-9fbcc0e56c28/go.mod h1:e+Hf9OyKXFxAoCTF3thTm2Sz8KDfJ/iiEOHOmADpxRI=
github.com/cloudwego/iasm v0.2.0 h1:1KNIy1I1H9hNNFEEH3DVnI4UujN+1zjpuk6gwHLTssg=
Expand Down
2 changes: 1 addition & 1 deletion components/document/parser/html/go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ go 1.18

require (
github.com/PuerkitoBio/goquery v1.8.1
github.com/cloudwego/eino v0.3.4
github.com/cloudwego/eino v0.3.7
github.com/microcosm-cc/bluemonday v1.0.27
github.com/stretchr/testify v1.9.0
)
Expand Down
4 changes: 2 additions & 2 deletions components/document/parser/html/go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@ github.com/bytedance/sonic/loader v0.2.0/go.mod h1:ncP89zfokxS5LZrJxl5z0UJcsk4M4
github.com/certifi/gocertifi v0.0.0-20190105021004-abcd57078448/go.mod h1:GJKEexRPVJrBSOjoqN5VNOIKJ5Q3RViH6eu3puDRwx4=
github.com/cloudwego/base64x v0.1.4 h1:jwCgWpFanWmN8xoIUHa2rtzmkd5J2plF/dnLS6Xd/0Y=
github.com/cloudwego/base64x v0.1.4/go.mod h1:0zlkT4Wn5C6NdauXdJRhSKRlJvmclQ1hhJgA0rcu/8w=
github.com/cloudwego/eino v0.3.4 h1:trWw8lKU1t1b7PMKSW1GXEJ4H2rLiGWFyVoMJJ3pRDg=
github.com/cloudwego/eino v0.3.4/go.mod h1:+kmJimGEcKuSI6OKhet7kBedkm1WUZS3H1QRazxgWUo=
github.com/cloudwego/eino v0.3.7 h1:PE1yFaAPVenRhDl0x6N1U2rKrfZkSr1hKlcacO6P+VA=
github.com/cloudwego/eino v0.3.7/go.mod h1:+kmJimGEcKuSI6OKhet7kBedkm1WUZS3H1QRazxgWUo=
github.com/cloudwego/iasm v0.2.0 h1:1KNIy1I1H9hNNFEEH3DVnI4UujN+1zjpuk6gwHLTssg=
github.com/cloudwego/iasm v0.2.0/go.mod h1:8rXZaNYT2n95jn+zTI1sDr+IgcD2GVs0nlbbQPiEFhY=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
Expand Down
2 changes: 1 addition & 1 deletion components/document/parser/pdf/go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ module github.com/cloudwego/eino-ext/components/document/parser/pdf
go 1.18

require (
github.com/cloudwego/eino v0.3.4
github.com/cloudwego/eino v0.3.7
github.com/dslipak/pdf v0.0.2
github.com/stretchr/testify v1.9.0
)
Expand Down
4 changes: 2 additions & 2 deletions components/document/parser/pdf/go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,8 @@ github.com/bytedance/sonic/loader v0.2.0/go.mod h1:ncP89zfokxS5LZrJxl5z0UJcsk4M4
github.com/certifi/gocertifi v0.0.0-20190105021004-abcd57078448/go.mod h1:GJKEexRPVJrBSOjoqN5VNOIKJ5Q3RViH6eu3puDRwx4=
github.com/cloudwego/base64x v0.1.4 h1:jwCgWpFanWmN8xoIUHa2rtzmkd5J2plF/dnLS6Xd/0Y=
github.com/cloudwego/base64x v0.1.4/go.mod h1:0zlkT4Wn5C6NdauXdJRhSKRlJvmclQ1hhJgA0rcu/8w=
github.com/cloudwego/eino v0.3.4 h1:trWw8lKU1t1b7PMKSW1GXEJ4H2rLiGWFyVoMJJ3pRDg=
github.com/cloudwego/eino v0.3.4/go.mod h1:+kmJimGEcKuSI6OKhet7kBedkm1WUZS3H1QRazxgWUo=
github.com/cloudwego/eino v0.3.7 h1:PE1yFaAPVenRhDl0x6N1U2rKrfZkSr1hKlcacO6P+VA=
github.com/cloudwego/eino v0.3.7/go.mod h1:+kmJimGEcKuSI6OKhet7kBedkm1WUZS3H1QRazxgWUo=
github.com/cloudwego/iasm v0.2.0 h1:1KNIy1I1H9hNNFEEH3DVnI4UujN+1zjpuk6gwHLTssg=
github.com/cloudwego/iasm v0.2.0/go.mod h1:8rXZaNYT2n95jn+zTI1sDr+IgcD2GVs0nlbbQPiEFhY=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
Expand Down
2 changes: 1 addition & 1 deletion components/document/transformer/reranker/score/go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ module github.com/cloudwego/eino-ext/components/document/transformer/reranker/sc

go 1.18

require github.com/cloudwego/eino v0.3.4
require github.com/cloudwego/eino v0.3.7

require (
github.com/bytedance/sonic v1.12.2 // indirect
Expand Down
4 changes: 2 additions & 2 deletions components/document/transformer/reranker/score/go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,8 @@ github.com/bytedance/sonic/loader v0.2.0/go.mod h1:ncP89zfokxS5LZrJxl5z0UJcsk4M4
github.com/certifi/gocertifi v0.0.0-20190105021004-abcd57078448/go.mod h1:GJKEexRPVJrBSOjoqN5VNOIKJ5Q3RViH6eu3puDRwx4=
github.com/cloudwego/base64x v0.1.4 h1:jwCgWpFanWmN8xoIUHa2rtzmkd5J2plF/dnLS6Xd/0Y=
github.com/cloudwego/base64x v0.1.4/go.mod h1:0zlkT4Wn5C6NdauXdJRhSKRlJvmclQ1hhJgA0rcu/8w=
github.com/cloudwego/eino v0.3.4 h1:trWw8lKU1t1b7PMKSW1GXEJ4H2rLiGWFyVoMJJ3pRDg=
github.com/cloudwego/eino v0.3.4/go.mod h1:+kmJimGEcKuSI6OKhet7kBedkm1WUZS3H1QRazxgWUo=
github.com/cloudwego/eino v0.3.7 h1:PE1yFaAPVenRhDl0x6N1U2rKrfZkSr1hKlcacO6P+VA=
github.com/cloudwego/eino v0.3.7/go.mod h1:+kmJimGEcKuSI6OKhet7kBedkm1WUZS3H1QRazxgWUo=
github.com/cloudwego/iasm v0.2.0 h1:1KNIy1I1H9hNNFEEH3DVnI4UujN+1zjpuk6gwHLTssg=
github.com/cloudwego/iasm v0.2.0/go.mod h1:8rXZaNYT2n95jn+zTI1sDr+IgcD2GVs0nlbbQPiEFhY=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
Expand Down
2 changes: 1 addition & 1 deletion components/document/transformer/splitter/html/go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ module github.com/cloudwego/eino-ext/components/document/transformer/splitter/ht
go 1.18

require (
github.com/cloudwego/eino v0.3.4
github.com/cloudwego/eino v0.3.7
golang.org/x/net v0.33.0
)

Expand Down
4 changes: 2 additions & 2 deletions components/document/transformer/splitter/html/go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,8 @@ github.com/bytedance/sonic/loader v0.2.0/go.mod h1:ncP89zfokxS5LZrJxl5z0UJcsk4M4
github.com/certifi/gocertifi v0.0.0-20190105021004-abcd57078448/go.mod h1:GJKEexRPVJrBSOjoqN5VNOIKJ5Q3RViH6eu3puDRwx4=
github.com/cloudwego/base64x v0.1.4 h1:jwCgWpFanWmN8xoIUHa2rtzmkd5J2plF/dnLS6Xd/0Y=
github.com/cloudwego/base64x v0.1.4/go.mod h1:0zlkT4Wn5C6NdauXdJRhSKRlJvmclQ1hhJgA0rcu/8w=
github.com/cloudwego/eino v0.3.4 h1:trWw8lKU1t1b7PMKSW1GXEJ4H2rLiGWFyVoMJJ3pRDg=
github.com/cloudwego/eino v0.3.4/go.mod h1:+kmJimGEcKuSI6OKhet7kBedkm1WUZS3H1QRazxgWUo=
github.com/cloudwego/eino v0.3.7 h1:PE1yFaAPVenRhDl0x6N1U2rKrfZkSr1hKlcacO6P+VA=
github.com/cloudwego/eino v0.3.7/go.mod h1:+kmJimGEcKuSI6OKhet7kBedkm1WUZS3H1QRazxgWUo=
github.com/cloudwego/iasm v0.2.0 h1:1KNIy1I1H9hNNFEEH3DVnI4UujN+1zjpuk6gwHLTssg=
github.com/cloudwego/iasm v0.2.0/go.mod h1:8rXZaNYT2n95jn+zTI1sDr+IgcD2GVs0nlbbQPiEFhY=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
Expand Down
2 changes: 1 addition & 1 deletion components/document/transformer/splitter/markdown/go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ module github.com/cloudwego/eino-ext/components/document/transformer/splitter/ma

go 1.18

require github.com/cloudwego/eino v0.3.4
require github.com/cloudwego/eino v0.3.7

require (
github.com/bytedance/sonic v1.12.2 // indirect
Expand Down
4 changes: 2 additions & 2 deletions components/document/transformer/splitter/markdown/go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,8 @@ github.com/bytedance/sonic/loader v0.2.0/go.mod h1:ncP89zfokxS5LZrJxl5z0UJcsk4M4
github.com/certifi/gocertifi v0.0.0-20190105021004-abcd57078448/go.mod h1:GJKEexRPVJrBSOjoqN5VNOIKJ5Q3RViH6eu3puDRwx4=
github.com/cloudwego/base64x v0.1.4 h1:jwCgWpFanWmN8xoIUHa2rtzmkd5J2plF/dnLS6Xd/0Y=
github.com/cloudwego/base64x v0.1.4/go.mod h1:0zlkT4Wn5C6NdauXdJRhSKRlJvmclQ1hhJgA0rcu/8w=
github.com/cloudwego/eino v0.3.4 h1:trWw8lKU1t1b7PMKSW1GXEJ4H2rLiGWFyVoMJJ3pRDg=
github.com/cloudwego/eino v0.3.4/go.mod h1:+kmJimGEcKuSI6OKhet7kBedkm1WUZS3H1QRazxgWUo=
github.com/cloudwego/eino v0.3.7 h1:PE1yFaAPVenRhDl0x6N1U2rKrfZkSr1hKlcacO6P+VA=
github.com/cloudwego/eino v0.3.7/go.mod h1:+kmJimGEcKuSI6OKhet7kBedkm1WUZS3H1QRazxgWUo=
github.com/cloudwego/iasm v0.2.0 h1:1KNIy1I1H9hNNFEEH3DVnI4UujN+1zjpuk6gwHLTssg=
github.com/cloudwego/iasm v0.2.0/go.mod h1:8rXZaNYT2n95jn+zTI1sDr+IgcD2GVs0nlbbQPiEFhY=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
Expand Down
33 changes: 33 additions & 0 deletions components/document/transformer/splitter/recursive/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
# recursive splitter

Recursive splitter is a splitter that splits the text into chunks recursively. Useful for splitting long text into chunks.

`OverlapSize` in config can set the overlap content length from last chunk, this may help to keep the context of last chunk.

## Usage

example at: [examples/main.go](examples/main.go)
run example: `cd examples && go run main.go`

```go
import (
"context"
"fmt"
"os"

"github.com/cloudwego/eino-ext/components/document/transformer/splitter/recursive"
)

func main() {
ctx := context.Background()

splitter, err := recursive.NewSplitter(ctx, &recursive.Config{
ChunkSize: 1500,
OverlapSize: 300,
})

docs, err := splitter.Transform(ctx, []*schema.Document{
{Content: "test content"},
})
}
```
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
/*
* Copyright 2024 CloudWeGo Authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package main
kuhahalong marked this conversation as resolved.
Show resolved Hide resolved

import (
"context"
"fmt"
"os"

"github.com/cloudwego/eino-ext/components/document/transformer/splitter/recursive"
kuhahalong marked this conversation as resolved.
Show resolved Hide resolved
"github.com/cloudwego/eino/schema"
)

func main() {
ctx := context.Background()

splitter, err := recursive.NewSplitter(ctx, &recursive.Config{
ChunkSize: 1500,
OverlapSize: 300,
KeepType: recursive.KeepTypeNone,
})
if err != nil {
panic(err)
kuhahalong marked this conversation as resolved.
Show resolved Hide resolved
}

file := "./testdata/eino_readme.md"
kuhahalong marked this conversation as resolved.
Show resolved Hide resolved
data, err := os.ReadFile(file)
if err != nil {
panic(err)
}

docs, err := splitter.Transform(ctx, []*schema.Document{
{
Content: string(data),
},
})

if err != nil {
panic(err)
}

for idx, doc := range docs {
fmt.Printf("====== %02d ======\n", idx)
meguminnnnnnnnn marked this conversation as resolved.
Show resolved Hide resolved
fmt.Println(doc.Content)
}

}
Loading
Loading