Skip to content

Commit

Permalink
Add benchmark
Browse files Browse the repository at this point in the history
  • Loading branch information
Comet0322 committed Dec 10, 2024
1 parent 16a5ef1 commit 0c4ee6d
Show file tree
Hide file tree
Showing 2 changed files with 301 additions and 0 deletions.
64 changes: 64 additions & 0 deletions benchmark/data/all_benchmark_data.csv
Original file line number Diff line number Diff line change
Expand Up @@ -715,3 +715,67 @@ fused_linear_simpo_loss,huggingface,full,memory,MB,B,B,2,8645.314453125,8645.314
fused_linear_simpo_loss,huggingface,full,memory,MB,B,B,4,12184.330078125,12184.330078125,12184.330078125,"{""T"": 1024, ""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2024-11-15 14:30:01,0.4.1
fused_linear_simpo_loss,huggingface,full,memory,MB,B,B,8,19262.361328125,19262.361328125,19262.361328125,"{""T"": 1024, ""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2024-11-15 14:30:01,0.4.1
fused_linear_simpo_loss,huggingface,full,memory,MB,B,B,16,33418.42578125,33418.42578125,33418.42578125,"{""T"": 1024, ""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2024-11-15 14:30:01,0.4.1
rope_paper,liger,forward,speed,ms,H,hidden size,512,0.027648000046610832,0.027648000046610832,0.028672000393271446,"{""dtype"": ""torch.bfloat16"", ""seq_len"": 2048, ""num_q_heads"": 32, ""num_kv_heads"": 8}",NVIDIA A100 80GB PCIe,2024-12-10 21:15:14,0.3.1
rope_paper,liger,forward,speed,ms,H,hidden size,2048,0.1515520066022873,0.15052799880504608,0.15360000729560852,"{""dtype"": ""torch.bfloat16"", ""seq_len"": 2048, ""num_q_heads"": 32, ""num_kv_heads"": 8}",NVIDIA A100 80GB PCIe,2024-12-10 21:15:14,0.3.1
rope_paper,liger,forward,speed,ms,H,hidden size,8192,0.5099520087242126,0.5079039931297302,0.5120000243186951,"{""dtype"": ""torch.bfloat16"", ""seq_len"": 2048, ""num_q_heads"": 32, ""num_kv_heads"": 8}",NVIDIA A100 80GB PCIe,2024-12-10 21:15:14,0.3.1
rope_paper,huggingface,forward,speed,ms,H,hidden size,512,0.12800000607967377,0.12492799758911133,0.13209599256515503,"{""dtype"": ""torch.bfloat16"", ""seq_len"": 2048, ""num_q_heads"": 32, ""num_kv_heads"": 8}",NVIDIA A100 80GB PCIe,2024-12-10 21:15:16,0.3.1
rope_paper,huggingface,forward,speed,ms,H,hidden size,2048,0.17203199863433838,0.17100800573825836,0.17203199863433838,"{""dtype"": ""torch.bfloat16"", ""seq_len"": 2048, ""num_q_heads"": 32, ""num_kv_heads"": 8}",NVIDIA A100 80GB PCIe,2024-12-10 21:15:16,0.3.1
rope_paper,huggingface,forward,speed,ms,H,hidden size,8192,0.5396479964256287,0.5386239886283875,0.5416960120201111,"{""dtype"": ""torch.bfloat16"", ""seq_len"": 2048, ""num_q_heads"": 32, ""num_kv_heads"": 8}",NVIDIA A100 80GB PCIe,2024-12-10 21:15:16,0.3.1
rope_paper,liger,backward,speed,ms,H,hidden size,512,0.021503999829292297,0.01945599913597107,0.02457600086927414,"{""dtype"": ""torch.bfloat16"", ""seq_len"": 2048, ""num_q_heads"": 32, ""num_kv_heads"": 8}",NVIDIA A100 80GB PCIe,2024-12-10 21:15:18,0.3.1
rope_paper,liger,backward,speed,ms,H,hidden size,2048,0.13926400244235992,0.1382399946451187,0.14028799533843994,"{""dtype"": ""torch.bfloat16"", ""seq_len"": 2048, ""num_q_heads"": 32, ""num_kv_heads"": 8}",NVIDIA A100 80GB PCIe,2024-12-10 21:15:18,0.3.1
rope_paper,liger,backward,speed,ms,H,hidden size,8192,0.49561598896980286,0.4935680031776428,0.4976640045642853,"{""dtype"": ""torch.bfloat16"", ""seq_len"": 2048, ""num_q_heads"": 32, ""num_kv_heads"": 8}",NVIDIA A100 80GB PCIe,2024-12-10 21:15:18,0.3.1
rope_paper,huggingface,backward,speed,ms,H,hidden size,512,0.22732800245285034,0.22466561198234558,0.2314240038394928,"{""dtype"": ""torch.bfloat16"", ""seq_len"": 2048, ""num_q_heads"": 32, ""num_kv_heads"": 8}",NVIDIA A100 80GB PCIe,2024-12-10 21:15:20,0.3.1
rope_paper,huggingface,backward,speed,ms,H,hidden size,2048,0.20787200331687927,0.20684799551963806,0.20787200331687927,"{""dtype"": ""torch.bfloat16"", ""seq_len"": 2048, ""num_q_heads"": 32, ""num_kv_heads"": 8}",NVIDIA A100 80GB PCIe,2024-12-10 21:15:20,0.3.1
rope_paper,huggingface,backward,speed,ms,H,hidden size,8192,0.7290880084037781,0.7290880084037781,0.7301120162010193,"{""dtype"": ""torch.bfloat16"", ""seq_len"": 2048, ""num_q_heads"": 32, ""num_kv_heads"": 8}",NVIDIA A100 80GB PCIe,2024-12-10 21:15:20,0.3.1
rope_paper,liger,full,speed,ms,H,hidden size,512,0.14233599603176117,0.10444799810647964,0.14622725546360016,"{""dtype"": ""torch.bfloat16"", ""seq_len"": 2048, ""num_q_heads"": 32, ""num_kv_heads"": 8}",NVIDIA A100 80GB PCIe,2024-12-10 21:15:22,0.3.1
rope_paper,liger,full,speed,ms,H,hidden size,2048,0.28672000765800476,0.28467199206352234,0.2887679934501648,"{""dtype"": ""torch.bfloat16"", ""seq_len"": 2048, ""num_q_heads"": 32, ""num_kv_heads"": 8}",NVIDIA A100 80GB PCIe,2024-12-10 21:15:22,0.3.1
rope_paper,liger,full,speed,ms,H,hidden size,8192,1.001471996307373,0.9983999729156494,1.0045440196990967,"{""dtype"": ""torch.bfloat16"", ""seq_len"": 2048, ""num_q_heads"": 32, ""num_kv_heads"": 8}",NVIDIA A100 80GB PCIe,2024-12-10 21:15:22,0.3.1
rope_paper,huggingface,full,speed,ms,H,hidden size,512,0.44441598653793335,0.4413439929485321,0.45977601408958435,"{""dtype"": ""torch.bfloat16"", ""seq_len"": 2048, ""num_q_heads"": 32, ""num_kv_heads"": 8}",NVIDIA A100 80GB PCIe,2024-12-10 21:15:24,0.3.1
rope_paper,huggingface,full,speed,ms,H,hidden size,2048,0.4249599874019623,0.42393600940704346,0.4280320107936859,"{""dtype"": ""torch.bfloat16"", ""seq_len"": 2048, ""num_q_heads"": 32, ""num_kv_heads"": 8}",NVIDIA A100 80GB PCIe,2024-12-10 21:15:24,0.3.1
rope_paper,huggingface,full,speed,ms,H,hidden size,8192,1.2636159658432007,1.2625919580459595,1.265663981437683,"{""dtype"": ""torch.bfloat16"", ""seq_len"": 2048, ""num_q_heads"": 32, ""num_kv_heads"": 8}",NVIDIA A100 80GB PCIe,2024-12-10 21:15:24,0.3.1
rope_paper,liger,full,memory,MB,H,hidden size,512,5.25,5.25,5.25,"{""dtype"": ""torch.bfloat16"", ""seq_len"": 2048, ""num_q_heads"": 32, ""num_kv_heads"": 8}",NVIDIA A100 80GB PCIe,2024-12-10 21:15:25,0.3.1
rope_paper,liger,full,memory,MB,H,hidden size,2048,21.0,21.0,21.0,"{""dtype"": ""torch.bfloat16"", ""seq_len"": 2048, ""num_q_heads"": 32, ""num_kv_heads"": 8}",NVIDIA A100 80GB PCIe,2024-12-10 21:15:25,0.3.1
rope_paper,liger,full,memory,MB,H,hidden size,8192,84.0,84.0,84.0,"{""dtype"": ""torch.bfloat16"", ""seq_len"": 2048, ""num_q_heads"": 32, ""num_kv_heads"": 8}",NVIDIA A100 80GB PCIe,2024-12-10 21:15:25,0.3.1
rope_paper,huggingface,full,memory,MB,H,hidden size,512,14.3125,14.3125,14.3125,"{""dtype"": ""torch.bfloat16"", ""seq_len"": 2048, ""num_q_heads"": 32, ""num_kv_heads"": 8}",NVIDIA A100 80GB PCIe,2024-12-10 21:15:26,0.3.1
rope_paper,huggingface,full,memory,MB,H,hidden size,2048,57.25,57.25,57.25,"{""dtype"": ""torch.bfloat16"", ""seq_len"": 2048, ""num_q_heads"": 32, ""num_kv_heads"": 8}",NVIDIA A100 80GB PCIe,2024-12-10 21:15:26,0.3.1
rope_paper,huggingface,full,memory,MB,H,hidden size,8192,229.0,229.0,229.0,"{""dtype"": ""torch.bfloat16"", ""seq_len"": 2048, ""num_q_heads"": 32, ""num_kv_heads"": 8}",NVIDIA A100 80GB PCIe,2024-12-10 21:15:26,0.3.1
rope_paper,liger,forward,speed,ms,T,sequence length,1024,0.2836480140686035,0.2815999984741211,0.28569599986076355,"{""dtype"": ""torch.bfloat16"", ""hidden_size"": 8192, ""num_q_heads"": 32, ""num_kv_heads"": 8}",NVIDIA A100 80GB PCIe,2024-12-10 21:15:37,0.3.1
rope_paper,liger,forward,speed,ms,T,sequence length,2048,0.5089280009269714,0.506879985332489,0.5120000243186951,"{""dtype"": ""torch.bfloat16"", ""hidden_size"": 8192, ""num_q_heads"": 32, ""num_kv_heads"": 8}",NVIDIA A100 80GB PCIe,2024-12-10 21:15:37,0.3.1
rope_paper,liger,forward,speed,ms,T,sequence length,4096,0.9666560292243958,0.9646080136299133,0.9697279930114746,"{""dtype"": ""torch.bfloat16"", ""hidden_size"": 8192, ""num_q_heads"": 32, ""num_kv_heads"": 8}",NVIDIA A100 80GB PCIe,2024-12-10 21:15:37,0.3.1
rope_paper,liger,forward,speed,ms,T,sequence length,8192,1.8821120262145996,1.8800640106201172,1.8851840496063232,"{""dtype"": ""torch.bfloat16"", ""hidden_size"": 8192, ""num_q_heads"": 32, ""num_kv_heads"": 8}",NVIDIA A100 80GB PCIe,2024-12-10 21:15:37,0.3.1
rope_paper,liger,forward,speed,ms,T,sequence length,16384,3.7099521160125732,3.7058560848236084,3.7130239009857178,"{""dtype"": ""torch.bfloat16"", ""hidden_size"": 8192, ""num_q_heads"": 32, ""num_kv_heads"": 8}",NVIDIA A100 80GB PCIe,2024-12-10 21:15:37,0.3.1
rope_paper,huggingface,forward,speed,ms,T,sequence length,1024,0.289792001247406,0.2887679934501648,0.289792001247406,"{""dtype"": ""torch.bfloat16"", ""hidden_size"": 8192, ""num_q_heads"": 32, ""num_kv_heads"": 8}",NVIDIA A100 80GB PCIe,2024-12-10 21:15:48,0.3.1
rope_paper,huggingface,forward,speed,ms,T,sequence length,2048,0.5396479964256287,0.5386239886283875,0.5416960120201111,"{""dtype"": ""torch.bfloat16"", ""hidden_size"": 8192, ""num_q_heads"": 32, ""num_kv_heads"": 8}",NVIDIA A100 80GB PCIe,2024-12-10 21:15:48,0.3.1
rope_paper,huggingface,forward,speed,ms,T,sequence length,4096,1.0240000486373901,1.0219520330429077,1.026047945022583,"{""dtype"": ""torch.bfloat16"", ""hidden_size"": 8192, ""num_q_heads"": 32, ""num_kv_heads"": 8}",NVIDIA A100 80GB PCIe,2024-12-10 21:15:48,0.3.1
rope_paper,huggingface,forward,speed,ms,T,sequence length,8192,1.9967999458312988,1.994752049446106,1.9988479614257812,"{""dtype"": ""torch.bfloat16"", ""hidden_size"": 8192, ""num_q_heads"": 32, ""num_kv_heads"": 8}",NVIDIA A100 80GB PCIe,2024-12-10 21:15:48,0.3.1
rope_paper,huggingface,forward,speed,ms,T,sequence length,16384,3.9383039474487305,3.935231924057007,3.940351963043213,"{""dtype"": ""torch.bfloat16"", ""hidden_size"": 8192, ""num_q_heads"": 32, ""num_kv_heads"": 8}",NVIDIA A100 80GB PCIe,2024-12-10 21:15:48,0.3.1
rope_paper,liger,backward,speed,ms,T,sequence length,1024,0.2682879865169525,0.2662400007247925,0.27033600211143494,"{""dtype"": ""torch.bfloat16"", ""hidden_size"": 8192, ""num_q_heads"": 32, ""num_kv_heads"": 8}",NVIDIA A100 80GB PCIe,2024-12-10 21:16:00,0.3.1
rope_paper,liger,backward,speed,ms,T,sequence length,2048,0.49663999676704407,0.49459201097488403,0.4986880123615265,"{""dtype"": ""torch.bfloat16"", ""hidden_size"": 8192, ""num_q_heads"": 32, ""num_kv_heads"": 8}",NVIDIA A100 80GB PCIe,2024-12-10 21:16:00,0.3.1
rope_paper,liger,backward,speed,ms,T,sequence length,4096,0.9523199796676636,0.9502720236778259,0.9553920030593872,"{""dtype"": ""torch.bfloat16"", ""hidden_size"": 8192, ""num_q_heads"": 32, ""num_kv_heads"": 8}",NVIDIA A100 80GB PCIe,2024-12-10 21:16:00,0.3.1
rope_paper,liger,backward,speed,ms,T,sequence length,8192,1.8626559972763062,1.8595839738845825,1.8657280206680298,"{""dtype"": ""torch.bfloat16"", ""hidden_size"": 8192, ""num_q_heads"": 32, ""num_kv_heads"": 8}",NVIDIA A100 80GB PCIe,2024-12-10 21:16:00,0.3.1
rope_paper,liger,backward,speed,ms,T,sequence length,16384,3.680255889892578,3.676774501800537,3.684351921081543,"{""dtype"": ""torch.bfloat16"", ""hidden_size"": 8192, ""num_q_heads"": 32, ""num_kv_heads"": 8}",NVIDIA A100 80GB PCIe,2024-12-10 21:16:00,0.3.1
rope_paper,huggingface,backward,speed,ms,T,sequence length,1024,0.37068799138069153,0.3696640133857727,0.37171199917793274,"{""dtype"": ""torch.bfloat16"", ""hidden_size"": 8192, ""num_q_heads"": 32, ""num_kv_heads"": 8}",NVIDIA A100 80GB PCIe,2024-12-10 21:16:11,0.3.1
rope_paper,huggingface,backward,speed,ms,T,sequence length,2048,0.7311360239982605,0.7301120162010193,0.7321599721908569,"{""dtype"": ""torch.bfloat16"", ""hidden_size"": 8192, ""num_q_heads"": 32, ""num_kv_heads"": 8}",NVIDIA A100 80GB PCIe,2024-12-10 21:16:11,0.3.1
rope_paper,huggingface,backward,speed,ms,T,sequence length,4096,1.3957120180130005,1.3946880102157593,1.3967360258102417,"{""dtype"": ""torch.bfloat16"", ""hidden_size"": 8192, ""num_q_heads"": 32, ""num_kv_heads"": 8}",NVIDIA A100 80GB PCIe,2024-12-10 21:16:11,0.3.1
rope_paper,huggingface,backward,speed,ms,T,sequence length,8192,2.751487970352173,2.7494399547576904,2.7535359859466553,"{""dtype"": ""torch.bfloat16"", ""hidden_size"": 8192, ""num_q_heads"": 32, ""num_kv_heads"": 8}",NVIDIA A100 80GB PCIe,2024-12-10 21:16:11,0.3.1
rope_paper,huggingface,backward,speed,ms,T,sequence length,16384,5.410816192626953,5.40774393081665,5.413887977600098,"{""dtype"": ""torch.bfloat16"", ""hidden_size"": 8192, ""num_q_heads"": 32, ""num_kv_heads"": 8}",NVIDIA A100 80GB PCIe,2024-12-10 21:16:11,0.3.1
rope_paper,liger,full,speed,ms,T,sequence length,1024,0.5478399991989136,0.5457919836044312,0.5509120225906372,"{""dtype"": ""torch.bfloat16"", ""hidden_size"": 8192, ""num_q_heads"": 32, ""num_kv_heads"": 8}",NVIDIA A100 80GB PCIe,2024-12-10 21:16:22,0.3.1
rope_paper,liger,full,speed,ms,T,sequence length,2048,1.0024960041046143,1.0004479885101318,1.0061824321746826,"{""dtype"": ""torch.bfloat16"", ""hidden_size"": 8192, ""num_q_heads"": 32, ""num_kv_heads"": 8}",NVIDIA A100 80GB PCIe,2024-12-10 21:16:22,0.3.1
rope_paper,liger,full,speed,ms,T,sequence length,4096,1.9169280529022217,1.913856029510498,1.921023964881897,"{""dtype"": ""torch.bfloat16"", ""hidden_size"": 8192, ""num_q_heads"": 32, ""num_kv_heads"": 8}",NVIDIA A100 80GB PCIe,2024-12-10 21:16:22,0.3.1
rope_paper,liger,full,speed,ms,T,sequence length,8192,3.742719888687134,3.7396481037139893,3.74783992767334,"{""dtype"": ""torch.bfloat16"", ""hidden_size"": 8192, ""num_q_heads"": 32, ""num_kv_heads"": 8}",NVIDIA A100 80GB PCIe,2024-12-10 21:16:22,0.3.1
rope_paper,liger,full,speed,ms,T,sequence length,16384,7.387135982513428,7.383449554443359,7.389798164367676,"{""dtype"": ""torch.bfloat16"", ""hidden_size"": 8192, ""num_q_heads"": 32, ""num_kv_heads"": 8}",NVIDIA A100 80GB PCIe,2024-12-10 21:16:22,0.3.1
rope_paper,huggingface,full,speed,ms,T,sequence length,1024,0.6563839912414551,0.6553599834442139,0.6574079990386963,"{""dtype"": ""torch.bfloat16"", ""hidden_size"": 8192, ""num_q_heads"": 32, ""num_kv_heads"": 8}",NVIDIA A100 80GB PCIe,2024-12-10 21:16:33,0.3.1
rope_paper,huggingface,full,speed,ms,T,sequence length,2048,1.264639973640442,1.2636159658432007,1.265663981437683,"{""dtype"": ""torch.bfloat16"", ""hidden_size"": 8192, ""num_q_heads"": 32, ""num_kv_heads"": 8}",NVIDIA A100 80GB PCIe,2024-12-10 21:16:33,0.3.1
rope_paper,huggingface,full,speed,ms,T,sequence length,4096,2.411520004272461,2.4094719886779785,2.412544012069702,"{""dtype"": ""torch.bfloat16"", ""hidden_size"": 8192, ""num_q_heads"": 32, ""num_kv_heads"": 8}",NVIDIA A100 80GB PCIe,2024-12-10 21:16:33,0.3.1
rope_paper,huggingface,full,speed,ms,T,sequence length,8192,4.7472639083862305,4.745215892791748,4.750336170196533,"{""dtype"": ""torch.bfloat16"", ""hidden_size"": 8192, ""num_q_heads"": 32, ""num_kv_heads"": 8}",NVIDIA A100 80GB PCIe,2024-12-10 21:16:33,0.3.1
rope_paper,huggingface,full,speed,ms,T,sequence length,16384,9.334783554077148,9.330893516540527,9.336832046508789,"{""dtype"": ""torch.bfloat16"", ""hidden_size"": 8192, ""num_q_heads"": 32, ""num_kv_heads"": 8}",NVIDIA A100 80GB PCIe,2024-12-10 21:16:33,0.3.1
rope_paper,liger,full,memory,MB,T,sequence length,1024,42.0,42.0,42.0,"{""dtype"": ""torch.bfloat16"", ""hidden_size"": 8192, ""num_q_heads"": 32, ""num_kv_heads"": 8}",NVIDIA A100 80GB PCIe,2024-12-10 21:16:42,0.3.1
rope_paper,liger,full,memory,MB,T,sequence length,2048,84.0,84.0,84.0,"{""dtype"": ""torch.bfloat16"", ""hidden_size"": 8192, ""num_q_heads"": 32, ""num_kv_heads"": 8}",NVIDIA A100 80GB PCIe,2024-12-10 21:16:42,0.3.1
rope_paper,liger,full,memory,MB,T,sequence length,4096,168.0,168.0,168.0,"{""dtype"": ""torch.bfloat16"", ""hidden_size"": 8192, ""num_q_heads"": 32, ""num_kv_heads"": 8}",NVIDIA A100 80GB PCIe,2024-12-10 21:16:42,0.3.1
rope_paper,liger,full,memory,MB,T,sequence length,8192,336.0,336.0,336.0,"{""dtype"": ""torch.bfloat16"", ""hidden_size"": 8192, ""num_q_heads"": 32, ""num_kv_heads"": 8}",NVIDIA A100 80GB PCIe,2024-12-10 21:16:42,0.3.1
rope_paper,liger,full,memory,MB,T,sequence length,16384,672.0,672.0,672.0,"{""dtype"": ""torch.bfloat16"", ""hidden_size"": 8192, ""num_q_heads"": 32, ""num_kv_heads"": 8}",NVIDIA A100 80GB PCIe,2024-12-10 21:16:42,0.3.1
rope_paper,huggingface,full,memory,MB,T,sequence length,1024,114.5,114.5,114.5,"{""dtype"": ""torch.bfloat16"", ""hidden_size"": 8192, ""num_q_heads"": 32, ""num_kv_heads"": 8}",NVIDIA A100 80GB PCIe,2024-12-10 21:16:51,0.3.1
rope_paper,huggingface,full,memory,MB,T,sequence length,2048,229.0,229.0,229.0,"{""dtype"": ""torch.bfloat16"", ""hidden_size"": 8192, ""num_q_heads"": 32, ""num_kv_heads"": 8}",NVIDIA A100 80GB PCIe,2024-12-10 21:16:51,0.3.1
rope_paper,huggingface,full,memory,MB,T,sequence length,4096,458.0,458.0,458.0,"{""dtype"": ""torch.bfloat16"", ""hidden_size"": 8192, ""num_q_heads"": 32, ""num_kv_heads"": 8}",NVIDIA A100 80GB PCIe,2024-12-10 21:16:51,0.3.1
rope_paper,huggingface,full,memory,MB,T,sequence length,8192,916.0,916.0,916.0,"{""dtype"": ""torch.bfloat16"", ""hidden_size"": 8192, ""num_q_heads"": 32, ""num_kv_heads"": 8}",NVIDIA A100 80GB PCIe,2024-12-10 21:16:51,0.3.1
rope_paper,huggingface,full,memory,MB,T,sequence length,16384,1832.0,1832.0,1832.0,"{""dtype"": ""torch.bfloat16"", ""hidden_size"": 8192, ""num_q_heads"": 32, ""num_kv_heads"": 8}",NVIDIA A100 80GB PCIe,2024-12-10 21:16:51,0.3.1
Loading

0 comments on commit 0c4ee6d

Please sign in to comment.