diff --git a/src/dataset/load.py b/src/dataset/load.py index a62c139..f463dd9 100644 --- a/src/dataset/load.py +++ b/src/dataset/load.py @@ -21,9 +21,9 @@ # GBオーダー -load_dataset("cognitivecomputations/dolphin", "flan1m-alpaca-uncensored", trust_remote_code=True) -load_dataset("cognitivecomputations/dolphin", "flan5m-alpaca-uncensored", trust_remote_code=True) -load_dataset("Open-Orca/OpenOrca", trust_remote_code=True) +# load_dataset("cognitivecomputations/dolphin", "flan1m-alpaca-uncensored", trust_remote_code=True) +# load_dataset("cognitivecomputations/dolphin", "flan5m-alpaca-uncensored", trust_remote_code=True) +# load_dataset("Open-Orca/OpenOrca", trust_remote_code=True) # デカい # load_dataset("cc100", "en", trust_remote_code=True) @@ -33,9 +33,9 @@ # デカすぎる # TinyLlamaが使ってる、895 GB -load_dataset("cerebras/SlimPajama-627B", trust_remote_code=True) +# load_dataset("cerebras/SlimPajama-627B", trust_remote_code=True) # TinyLlamaが使ってる、311 GB -load_dataset("bigcode/starcoderdata", trust_remote_code=True) +# load_dataset("bigcode/starcoderdata", trust_remote_code=True) # 886 GB # load_dataset("EleutherAI/pile", "all", trust_remote_code=True) # load_dataset("oscar")