Skip to content

Commit

Permalink
complete leaderboard
Browse files Browse the repository at this point in the history
  • Loading branch information
winnyyyyyy committed Nov 15, 2023
1 parent f553f7c commit 1e88872
Showing 1 changed file with 236 additions and 59 deletions.
295 changes: 236 additions & 59 deletions leaderboard.html
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@
<!-- each section uses an empty div as an anchor -->
<div id="body">
<br><br><br><br>
<h2>行业维度下准确性测评结果 Top 6</h2>
<h2>行业维度下准确性测评结果</h2>
<table class="js-sort-table" id="results">
<thead>
<tr>
Expand Down Expand Up @@ -130,11 +130,59 @@ <h2>行业维度下准确性测评结果 Top 6</h2>
<td>45.6</td>
<td>50.9</td>
</tr>
<tr>
<td style="text-align: left;"><strong>星火大模型</strong></td>
<td>56.4</td>
<td>53.6</td>
<td>51.3</td>
<td>47.4</td>
<td>48.2</td>
<td>38.8</td>
<td>54.6</td>
<td>47.7</td>
<td>49.8</td>
</tr>
<tr>
<td style="text-align: left;"><strong>商汤大模型</strong></td>
<td>52.4</td>
<td>51.6</td>
<td>49.3</td>
<td>46.1</td>
<td>48.4</td>
<td>53.1</td>
<td>42.7</td>
<td>45.5</td>
<td>48.6</td>
</tr>
<tr>
<td style="text-align: left;"><strong>从容大模型</strong></td>
<td>51.6</td>
<td>53.1</td>
<td>51.2</td>
<td>45.2</td>
<td>43.9</td>
<td>53.4</td>
<td>45.9</td>
<td>43.7</td>
<td>48.5</td>
</tr>
<tr>
<td style="text-align: left;"><strong>通义千问</strong></td>
<td>42.8</td>
<td>42.4</td>
<td>36.6</td>
<td>39.2</td>
<td>39.9</td>
<td>36.2</td>
<td>42.5</td>
<td>39.3</td>
<td>39.3</td>
</tr>
</tbody>

</table>

<h2>能力维度下准确性测评结果 Top 6</h2>
<h2>能力维度下准确性测评结果</h2>
<table class="js-sort-table" id="results">
<thead>
<tr>
Expand Down Expand Up @@ -216,10 +264,54 @@ <h2>能力维度下准确性测评结果 Top 6</h2>
<td>0</td>
<td>44.2</td>
</tr>
<tr>
<td style="text-align: left;"><strong>星火大模型</strong></td>
<td>44.3</td>
<td>58.7</td>
<td>71.1</td>
<td>65.1</td>
<td>64</td>
<td>0.7</td>
<td>0</td>
<td>43.41</td>
</tr>
<tr>
<td style="text-align: left;"><strong>商汤大模型</strong></td>
<td>43.7</td>
<td>59.1</td>
<td>70.7</td>
<td>62.7</td>
<td>63.1</td>
<td>0.1</td>
<td>0</td>
<td>42.8</td>
</tr>
<tr>
<td style="text-align: left;"><strong>从容大模型</strong></td>
<td>43.0</td>
<td>55.6</td>
<td>68.2</td>
<td>61.3</td>
<td>58.9</td>
<td>0</td>
<td>0</td>
<td>41</td>
</tr>
<tr>
<td style="text-align: left;"><strong>通义千问</strong></td>
<td>32.6</td>
<td>57.8</td>
<td>72.4</td>
<td>64.7</td>
<td>61.8</td>
<td>0.5</td>
<td>0</td>
<td>41.4</td>
</tr>
</tbody>
</table>

<h2>行业维度下稳定性测评结果 Top 6</h2>
<h2>行业维度下稳定性测评结果</h2>
<table class="js-sort-table" id="results">
<thead>
<tr>
Expand All @@ -236,6 +328,18 @@ <h2>行业维度下稳定性测评结果 Top 6</h2>
</tr>
</thead>
<tbody>
<tr>
<td style="text-align: left;"><strong>GPT3.5</strong></td>
<td>89</td>
<td>80</td>
<td>81</td>
<td>81</td>
<td>80</td>
<td>81</td>
<td>78</td>
<td>79</td>
<td>81</td>
</tr>
<tr>
<td style="text-align: left;"><strong>GPT4</strong></td>
<td>92</td>
Expand All @@ -249,28 +353,28 @@ <h2>行业维度下稳定性测评结果 Top 6</h2>
<td>83</td>
</tr>
<tr>
<td style="text-align: left;"><strong>GPT3.5</strong></td>
<td>89</td>
<td>80</td>
<td>81</td>
<td>81</td>
<td>80</td>
<td>81</td>
<td>78</td>
<td>79</td>
<td>81</td>
<td style="text-align: left;"><strong>通义千问</strong></td>
<td>62</td>
<td>74</td>
<td>73</td>
<td>62</td>
<td>63</td>
<td>64</td>
<td>63</td>
<td>67</td>
<td>66</td>
</tr>
<tr>
<td style="text-align: left;"><strong>星火认知大模型</strong></td>
<td>67</td>
<td>71</td>
<td>76</td>
<td>75</td>
<td>76</td>
<td style="text-align: left;"><strong>文心一言</strong></td>
<td>68</td>
<td>79</td>
<td>75</td>
<td>73</td>
<td>63</td>
<td>69</td>
<td>63</td>
<td>62</td>
<td>72</td>
<td>66</td>
<td>63</td>
<td>66</td>
</tr>
<tr>
<td style="text-align: left;"><strong>ChatGLM</strong></td>
Expand All @@ -285,16 +389,28 @@ <h2>行业维度下稳定性测评结果 Top 6</h2>
<td>68</td>
</tr>
<tr>
<td style="text-align: left;"><strong>360智脑</strong></td>
<td>74</td>
<td>74</td>
<td>69</td>
<td style="text-align: left;"><strong>星火认知大模型</strong></td>
<td>67</td>
<td>71</td>
<td>69</td>
<td>76</td>
<td>75</td>
<td>76</td>
<td>68</td>
<td>74</td>
<td>72</td>
<td>79</td>
<td>75</td>
<td>73</td>
</tr>
<tr>
<td style="text-align: left;"><strong>MinMax</strong></td>
<td>61</td>
<td>53</td>
<td>42</td>
<td>51</td>
<td>38</td>
<td>60</td>
<td>66</td>
<td>47</td>
<td>52</td>
</tr>
<tr>
<td style="text-align: left;"><strong>天工大模型</strong></td>
Expand All @@ -308,10 +424,35 @@ <h2>行业维度下稳定性测评结果 Top 6</h2>
<td>74</td>
<td>70</td>
</tr>
<tr>
<td style="text-align: left;"><strong>云从大模型</strong></td>
<td>69</td>
<td>63</td>
<td>69</td>
<td>59</td>
<td>68</td>
<td>67</td>
<td>66</td>
<td>63</td>
<td>66</td>
</tr>
<tr>
<td style="text-align: left;"><strong>360智脑</strong></td>
<td>74</td>
<td>74</td>
<td>69</td>
<td>71</td>
<td>69</td>
<td>76</td>
<td>68</td>
<td>74</td>
<td>72</td>
</tr>

</tbody>
</table>

<h2>能力维度下稳定性测评结果 Top 6</h2>
<h2>能力维度下稳定性测评结果</h2>
<table class="js-sort-table" id="results">
<thead>
<tr>
Expand All @@ -328,6 +469,18 @@ <h2>能力维度下稳定性测评结果 Top 6</h2>
</tr>
</thead>
<tbody>
<tr>
<td style="text-align: left;"><strong>GPT3.5</strong></td>
<td>94.2</td>
<td>76.9</td>
<td>83.7</td>
<td>88.1</td>
<td>83.3</td>
<td>84.7</td>
<td>90.2</td>
<td>83.2</td>
<td>85.5</td>
</tr>
<tr>
<td style="text-align: left;"><strong>GPT4</strong></td>
<td>95.7</td>
Expand All @@ -341,16 +494,28 @@ <h2>能力维度下稳定性测评结果 Top 6</h2>
<td>87.7</td>
</tr>
<tr>
<td style="text-align: left;"><strong>GPT3.5</strong></td>
<td>94.2</td>
<td>76.9</td>
<td>83.7</td>
<td>88.1</td>
<td>83.3</td>
<td>84.7</td>
<td>90.2</td>
<td>83.2</td>
<td>85.5</td>
<td style="text-align: left;"><strong>通义千问</strong></td>
<td>96.9</td>
<td>45.6</td>
<td>45.5</td>
<td>86.5</td>
<td>91.7</td>
<td>82.5</td>
<td>88.6</td>
<td>75.7</td>
<td>76.6</td>
</tr>
<tr>
<td style="text-align: left;"><strong>文心一言</strong></td>
<td>84.1</td>
<td>56</td>
<td>64.7</td>
<td>73.6</td>
<td>63.6</td>
<td>74.2</td>
<td>79.8</td>
<td>62.2</td>
<td>69.8</td>
</tr>
<tr>
<td style="text-align: left;"><strong>星火认知大模型</strong></td>
Expand All @@ -365,28 +530,28 @@ <h2>能力维度下稳定性测评结果 Top 6</h2>
<td>76.8</td>
</tr>
<tr>
<td style="text-align: left;"><strong>通义千问</strong></td>
<td>96.9</td>
<td>45.6</td>
<td style="text-align: left;"><strong>MinMax</strong></td>
<td>68.4</td>
<td>50</td>
<td>70.6</td>
<td>45.5</td>
<td>86.5</td>
<td>91.7</td>
<td>82.5</td>
<td>88.6</td>
<td>75.7</td>
<td>76.6</td>
<td>50</td>
<td>58.4</td>
<td>60.7</td>
<td>46.2</td>
<td>56.2</td>
</tr>
<tr>
<td style="text-align: left;"><strong>360智脑</strong></td>
<td>87.3</td>
<td>64.7</td>
<td>65.4</td>
<td>77</td>
<td>68.8</td>
<td>83.4</td>
<td>82.5</td>
<td>70.8</td>
<td>75</td>
<td style="text-align: left;"><strong>天工大模型</strong></td>
<td>78.7</td>
<td>64.2</td>
<td>70</td>
<td>66.7</td>
<td>33.3</td>
<td>80</td>
<td>76.7</td>
<td>78.8</td>
<td>68.5</td>
</tr>
<tr>
<td style="text-align: left;"><strong>云从大模型</strong></td>
Expand All @@ -400,6 +565,18 @@ <h2>能力维度下稳定性测评结果 Top 6</h2>
<td>69.4</td>
<td>73.4</td>
</tr>
<tr>
<td style="text-align: left;"><strong>360智脑</strong></td>
<td>87.3</td>
<td>64.7</td>
<td>65.4</td>
<td>77</td>
<td>68.8</td>
<td>83.4</td>
<td>82.5</td>
<td>70.8</td>
<td>75</td>
</tr>
</tbody>
</table>

Expand Down

0 comments on commit 1e88872

Please sign in to comment.