Skip to content

Commit

Permalink
Update src/dataset/load.py
Browse files Browse the repository at this point in the history
  • Loading branch information
yuiseki committed Mar 28, 2024
1 parent d2932d6 commit acf93fa
Showing 1 changed file with 6 additions and 6 deletions.
12 changes: 6 additions & 6 deletions src/dataset/load.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,16 +26,16 @@
load_dataset("Open-Orca/OpenOrca", trust_remote_code=True)

# デカい
load_dataset("cc100", "en", trust_remote_code=True)
load_dataset("cc100", "ja", trust_remote_code=True)
load_dataset("allenai/c4", "en", trust_remote_code=True)
load_dataset("allenai/c4", "ja", trust_remote_code=True)
# load_dataset("cc100", "en", trust_remote_code=True)
# load_dataset("cc100", "ja", trust_remote_code=True)
# load_dataset("allenai/c4", "en", trust_remote_code=True)
# load_dataset("allenai/c4", "ja", trust_remote_code=True)

# デカすぎる
# TinyLlamaが使ってる、895 GB
# load_dataset("cerebras/SlimPajama-627B", trust_remote_code=True)
load_dataset("cerebras/SlimPajama-627B", trust_remote_code=True)
# TinyLlamaが使ってる、311 GB
# load_dataset("bigcode/starcoderdata", trust_remote_code=True)
load_dataset("bigcode/starcoderdata", trust_remote_code=True)
# 886 GB
# load_dataset("EleutherAI/pile", "all", trust_remote_code=True)
# load_dataset("oscar")
Expand Down

0 comments on commit acf93fa

Please sign in to comment.