Skip to content

Commit

Permalink
Update src/dataset/load.py
Browse files Browse the repository at this point in the history
  • Loading branch information
yuiseki committed Mar 28, 2024
1 parent acf93fa commit dc16faf
Showing 1 changed file with 5 additions and 5 deletions.
10 changes: 5 additions & 5 deletions src/dataset/load.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,9 @@


# GBオーダー
load_dataset("cognitivecomputations/dolphin", "flan1m-alpaca-uncensored", trust_remote_code=True)
load_dataset("cognitivecomputations/dolphin", "flan5m-alpaca-uncensored", trust_remote_code=True)
load_dataset("Open-Orca/OpenOrca", trust_remote_code=True)
# load_dataset("cognitivecomputations/dolphin", "flan1m-alpaca-uncensored", trust_remote_code=True)
# load_dataset("cognitivecomputations/dolphin", "flan5m-alpaca-uncensored", trust_remote_code=True)
# load_dataset("Open-Orca/OpenOrca", trust_remote_code=True)

# デカい
# load_dataset("cc100", "en", trust_remote_code=True)
Expand All @@ -33,9 +33,9 @@

# デカすぎる
# TinyLlamaが使ってる、895 GB
load_dataset("cerebras/SlimPajama-627B", trust_remote_code=True)
# load_dataset("cerebras/SlimPajama-627B", trust_remote_code=True)
# TinyLlamaが使ってる、311 GB
load_dataset("bigcode/starcoderdata", trust_remote_code=True)
# load_dataset("bigcode/starcoderdata", trust_remote_code=True)
# 886 GB
# load_dataset("EleutherAI/pile", "all", trust_remote_code=True)
# load_dataset("oscar")
Expand Down

0 comments on commit dc16faf

Please sign in to comment.