gpt-4-1106-preview (few-shot, val) |
unknown |
100 |
128000 |
True |
576 ± 221 / 81 ± 28 |
1.00 |
86.37 ± 1.19 / 82.25 ± 2.73 |
52.13 ± 4.37 / 71.97 ± 3.44 |
37.26 ± 2.60 / 66.04 ± 1.95 |
69.61 ± 0.61 / 23.98 ± 1.17 |
68.38 ± 2.60 / 76.29 ± 1.93 |
74.78 ± 4.30 / 87.50 ± 2.14 |
12.5.1 |
12.5.1 |
12.5.1 |
12.5.1 |
12.5.1 |
12.5.1 |
meta-llama/Meta-Llama-3-70B (few-shot, val) |
70554 |
128 |
8192 |
True |
312 ± 55 / 177 ± 51 |
2.93 |
75.33 ± 3.06 / 62.43 ± 3.11 |
14.89 ± 3.28 / 51.55 ± 4.71 |
34.36 ± 2.87 / 64.02 ± 2.94 |
68.38 ± 0.30 / 21.55 ± 0.76 |
44.34 ± 2.87 / 58.16 ± 2.15 |
29.46 ± 2.11 / 50.86 ± 1.68 |
12.7.0 |
12.7.0 |
12.7.0 |
12.7.0 |
12.7.0 |
12.7.0 |
meta-llama/Meta-Llama-3-70B-Instruct (few-shot, val) |
70554 |
128 |
8317 |
True |
1,673 ± 583 / 275 ± 85 |
3.27 |
72.91 ± 3.48 / 68.22 ± 4.46 |
5.01 ± 4.64 / 50.69 ± 3.23 |
31.86 ± 1.86 / 60.66 ± 1.58 |
68.30 ± 0.30 / 21.80 ± 0.66 |
37.99 ± 4.28 / 53.36 ± 3.22 |
27.68 ± 4.21 / 60.39 ± 2.25 |
12.7.0 |
12.7.0 |
12.7.0 |
12.7.0 |
12.7.0 |
12.7.0 |
gpt-3.5-turbo-0613 (few-shot, val) |
unknown |
100 |
4095 |
True |
921 ± 293 / 113 ± 37 |
3.73 |
69.59 ± 4.54 / 54.49 ± 4.31 |
7.28 ± 4.10 / 52.96 ± 2.00 |
28.50 ± 1.79 / 50.29 ± 1.79 |
67.10 ± 0.30 / 19.43 ± 0.48 |
23.78 ± 2.81 / 42.70 ± 2.13 |
18.61 ± 6.00 / 61.33 ± 2.93 |
0.0.0 |
0.0.0 |
0.0.0 |
0.0.0 |
0.0.0 |
12.1.0 |
152334H/miqu-1-70b-sf (few-shot, val) |
68977 |
32 |
32889 |
True |
2,126 ± 676 / 319 ± 104 |
4.01 |
61.13 ± 3.80 / 50.79 ± 5.92 |
7.75 ± 5.66 / 46.83 ± 4.00 |
27.27 ± 4.08 / 60.91 ± 1.98 |
67.20 ± 0.49 / 20.53 ± 0.86 |
16.65 ± 3.21 / 37.70 ± 2.23 |
5.54 ± 2.19 / 35.62 ± 2.11 |
12.7.0 |
12.7.0 |
12.7.0 |
12.7.0 |
12.7.0 |
12.7.0 |
upstage/SOLAR-10.7B-v1.0 (few-shot) |
10732 |
32 |
4096 |
True |
3,780 ± 906 / 799 ± 261 |
4.04 |
62.08 ± 2.27 / 51.09 ± 4.15 |
7.58 ± 1.03 / 44.38 ± 3.90 |
29.66 ± 3.02 / 56.60 ± 2.22 |
66.11 ± 0.85 / 18.74 ± 0.90 |
11.88 ± 0.67 / 33.57 ± 0.49 |
7.64 ± 1.91 / 49.40 ± 1.45 |
12.5.3 |
12.5.3 |
12.5.3 |
12.5.3 |
12.5.3 |
12.5.3 |
meta-llama/Llama-2-70b-hf (few-shot, val) |
68977 |
32 |
4221 |
True |
1,892 ± 650 / 318 ± 105 |
4.05 |
63.25 ± 4.52 / 53.70 ± 3.09 |
0.00 ± 0.00 / 33.12 ± 0.74 |
32.65 ± 4.82 / 61.15 ± 3.96 |
66.93 ± 0.49 / 20.59 ± 0.94 |
16.19 ± 4.15 / 36.91 ± 3.32 |
3.95 ± 4.13 / 37.03 ± 2.20 |
12.7.0 |
12.7.0 |
12.7.0 |
12.7.0 |
12.7.0 |
12.7.0 |
meta-llama/Meta-Llama-3-8B (few-shot) |
8030 |
128 |
8192 |
True |
4,687 ± 1,121 / 967 ± 313 |
4.06 |
48.70 ± 3.02 / 34.52 ± 2.66 |
7.49 ± 2.51 / 43.40 ± 4.41 |
29.56 ± 5.47 / 55.53 ± 5.79 |
66.34 ± 1.09 / 19.13 ± 0.96 |
16.97 ± 1.01 / 37.63 ± 0.74 |
7.41 ± 3.26 / 52.13 ± 1.97 |
12.6.1 |
12.6.1 |
12.6.1 |
12.6.1 |
12.6.1 |
12.6.1 |
meta-llama/Meta-Llama-3-8B-Instruct (few-shot) |
8030 |
128 |
8192 |
True |
4,909 ± 1,215 / 978 ± 319 |
4.08 |
61.69 ± 2.17 / 41.25 ± 3.12 |
6.10 ± 1.61 / 48.74 ± 3.05 |
31.52 ± 2.08 / 58.96 ± 1.57 |
66.98 ± 1.04 / 19.84 ± 1.97 |
14.81 ± 1.23 / 34.74 ± 0.94 |
1.50 ± 1.22 / 48.33 ± 1.21 |
12.6.1 |
12.6.1 |
12.6.1 |
12.6.1 |
12.6.1 |
12.6.1 |
senseable/WestLake-7B-v2 (few-shot) |
7242 |
32 |
32768 |
False |
5,993 ± 1,028 / 1,742 ± 561 |
4.24 |
56.71 ± 1.98 / 46.71 ± 5.28 |
3.44 ± 2.02 / 50.18 ± 1.14 |
21.55 ± 2.79 / 54.79 ± 2.02 |
65.39 ± 0.80 / 18.24 ± 1.00 |
9.81 ± 0.85 / 32.28 ± 0.65 |
3.30 ± 2.81 / 44.40 ± 1.61 |
12.6.1 |
12.6.1 |
12.6.1 |
12.6.1 |
12.6.1 |
12.6.1 |
mlabonne/NeuralBeagle14-7B (few-shot, val) |
7242 |
32 |
8192 |
False |
2,549 ± 472 / 784 ± 245 |
4.27 |
49.86 ± 4.28 / 42.54 ± 5.03 |
1.26 ± 3.83 / 48.46 ± 2.37 |
22.48 ± 4.43 / 55.51 ± 2.89 |
65.60 ± 0.69 / 19.46 ± 0.80 |
10.13 ± 2.94 / 32.62 ± 2.20 |
12.90 ± 6.92 / 56.88 ± 3.57 |
9.3.2 |
9.3.2 |
12.5.2 |
9.3.2 |
9.3.2 |
12.1.0 |
AI-Sweden-Models/tyr (few-shot, val) |
7242 |
32 |
32768 |
False |
6,079 ± 1,051 / 1,760 ± 570 |
4.33 |
50.64 ± 5.79 / 43.64 ± 5.92 |
-0.06 ± 3.83 / 38.01 ± 2.85 |
23.36 ± 4.19 / 46.70 ± 2.79 |
63.80 ± 1.14 / 15.18 ± 1.69 |
9.94 ± 2.33 / 31.91 ± 2.10 |
15.83 ± 5.74 / 53.67 ± 3.46 |
12.3.2 |
12.3.2 |
12.3.2 |
12.3.2 |
12.3.2 |
12.3.2 |
Nexusflow/Starling-LM-7B-beta (few-shot) |
7242 |
32 |
8192 |
False |
5,876 ± 1,021 / 1,677 ± 546 |
4.39 |
49.20 ± 2.64 / 40.79 ± 4.46 |
4.45 ± 1.40 / 51.11 ± 0.87 |
24.61 ± 3.36 / 54.99 ± 2.36 |
63.74 ± 2.25 / 18.29 ± 1.40 |
9.81 ± 0.66 / 32.34 ± 0.54 |
1.14 ± 0.97 / 50.10 ± 0.82 |
12.5.2 |
12.5.2 |
12.5.2 |
12.5.2 |
12.5.2 |
12.5.2 |
mhenrichsen/hestenettetLM (few-shot) |
7242 |
32 |
32768 |
True |
5,160 ± 804 / 1,654 ± 516 |
4.39 |
50.82 ± 2.72 / 40.35 ± 4.51 |
0.99 ± 1.54 / 39.38 ± 3.81 |
25.74 ± 5.44 / 49.45 ± 5.29 |
61.72 ± 3.16 / 16.00 ± 1.82 |
10.43 ± 1.25 / 32.96 ± 0.81 |
3.94 ± 2.97 / 54.60 ± 1.62 |
12.5.2 |
12.3.2 |
12.3.2 |
12.3.2 |
12.3.2 |
12.3.2 |
mlabonne/AlphaMonarch-7B (few-shot, val) |
7242 |
32 |
8192 |
False |
5,340 ± 1,262 / 1,157 ± 375 |
4.42 |
50.85 ± 4.15 / 42.91 ± 5.02 |
1.80 ± 3.84 / 49.12 ± 2.51 |
20.23 ± 3.73 / 52.99 ± 2.11 |
64.46 ± 1.03 / 17.62 ± 1.10 |
9.92 ± 2.10 / 32.19 ± 1.69 |
9.09 ± 6.37 / 49.53 ± 5.40 |
12.5.2 |
12.5.2 |
12.5.2 |
12.5.2 |
12.5.2 |
12.5.2 |
alpindale/Mistral-7B-v0.2-hf (few-shot) |
7242 |
32 |
32768 |
True |
1,841 ± 297 / 651 ± 193 |
4.48 |
44.85 ± 3.44 / 36.23 ± 4.15 |
-0.69 ± 2.06 / 35.01 ± 1.14 |
25.52 ± 5.27 / 49.12 ± 5.22 |
61.54 ± 2.35 / 15.01 ± 1.61 |
10.42 ± 0.56 / 32.72 ± 0.36 |
4.46 ± 1.81 / 51.42 ± 2.60 |
12.5.2 |
12.5.2 |
12.5.2 |
12.5.2 |
12.5.2 |
12.5.2 |
mistralai/Mistral-7B-v0.1 (few-shot) |
7242 |
32 |
32768 |
True |
2,657 ± 524 / 880 ± 278 |
4.48 |
47.24 ± 2.54 / 37.77 ± 3.87 |
1.35 ± 1.70 / 39.37 ± 3.87 |
25.70 ± 5.36 / 49.31 ± 5.21 |
61.96 ± 3.10 / 16.11 ± 1.80 |
10.31 ± 1.06 / 32.74 ± 0.64 |
1.99 ± 2.95 / 54.48 ± 1.27 |
9.1.2 |
9.1.2 |
12.5.1 |
11.0.0 |
9.1.2 |
12.1.0 |
timpal0l/Mistral-7B-v0.1-flashback-v2 (few-shot) |
7242 |
32 |
32768 |
True |
5,054 ± 1,200 / 1,056 ± 339 |
4.53 |
36.47 ± 4.24 / 30.33 ± 3.70 |
2.54 ± 1.29 / 50.66 ± 0.62 |
18.66 ± 4.26 / 38.73 ± 3.66 |
63.68 ± 1.75 / 16.38 ± 1.24 |
6.94 ± 0.88 / 30.23 ± 0.46 |
8.30 ± 1.28 / 57.35 ± 0.75 |
12.5.3 |
12.5.3 |
12.5.3 |
12.5.3 |
12.5.3 |
12.5.3 |
meta-llama/Llama-2-70b-chat-hf (few-shot, val) |
68977 |
32 |
4221 |
True |
1,979 ± 621 / 320 ± 105 |
4.58 |
46.32 ± 5.10 / 35.77 ± 3.59 |
-3.31 ± 3.91 / 38.63 ± 2.52 |
24.26 ± 4.64 / 55.26 ± 2.41 |
66.16 ± 0.33 / 18.63 ± 0.56 |
8.27 ± 1.98 / 30.66 ± 1.24 |
2.75 ± 2.26 / 30.86 ± 1.74 |
12.7.0 |
12.7.0 |
12.7.0 |
12.7.0 |
12.7.0 |
12.7.0 |
mistralai/Mistral-7B-Instruct-v0.2 (few-shot) |
7242 |
32 |
32768 |
False |
2,538 ± 415 / 821 ± 253 |
4.58 |
43.11 ± 2.23 / 29.34 ± 3.27 |
3.40 ± 1.87 / 48.75 ± 1.47 |
19.18 ± 3.69 / 49.62 ± 2.59 |
65.01 ± 1.51 / 18.34 ± 1.35 |
7.55 ± 0.67 / 29.89 ± 0.47 |
0.24 ± 0.71 / 38.95 ± 0.84 |
9.2.0 |
9.3.1 |
12.4.0 |
12.4.0 |
9.3.2 |
12.1.0 |
occiglot/occiglot-7b-eu5-instruct (few-shot) |
7242 |
32 |
32768 |
False |
2,088 ± 352 / 706 ± 214 |
4.63 |
40.71 ± 2.93 / 34.57 ± 4.02 |
0.71 ± 2.00 / 36.90 ± 2.10 |
20.66 ± 3.67 / 45.91 ± 3.45 |
65.25 ± 0.97 / 19.09 ± 1.05 |
8.10 ± 0.93 / 29.90 ± 0.88 |
0.35 ± 2.49 / 51.16 ± 2.74 |
12.5.2 |
12.3.1 |
12.4.0 |
12.4.0 |
12.3.1 |
12.3.1 |
mistralai/Mistral-7B-Instruct-v0.1 (few-shot) |
7242 |
32 |
32768 |
False |
5,443 ± 1,273 / 1,144 ± 364 |
4.70 |
36.04 ± 2.59 / 24.74 ± 2.79 |
-0.36 ± 1.36 / 33.94 ± 0.32 |
18.06 ± 3.16 / 42.57 ± 2.89 |
62.80 ± 1.69 / 15.23 ± 1.01 |
7.22 ± 1.33 / 29.40 ± 1.04 |
6.35 ± 2.71 / 50.49 ± 1.57 |
9.3.1 |
9.3.1 |
12.4.0 |
12.4.0 |
9.3.1 |
12.1.0 |
occiglot/occiglot-7b-eu5 (few-shot) |
7242 |
32 |
32768 |
True |
2,219 ± 427 / 717 ± 224 |
4.74 |
40.08 ± 2.82 / 37.15 ± 4.07 |
1.59 ± 1.86 / 39.93 ± 4.19 |
15.98 ± 3.74 / 39.67 ± 3.36 |
62.55 ± 3.03 / 15.26 ± 2.31 |
7.64 ± 0.91 / 29.55 ± 1.22 |
-0.51 ± 1.95 / 47.23 ± 2.39 |
12.5.2 |
12.1.0 |
12.1.0 |
12.1.0 |
12.1.0 |
12.2.0 |
meta-llama/Llama-2-7b-hf (few-shot) |
6738 |
32 |
4096 |
True |
930 ± 310 / 128 ± 43 |
4.83 |
32.71 ± 2.77 / 32.17 ± 2.13 |
0.66 ± 1.75 / 40.36 ± 4.19 |
18.04 ± 4.05 / 41.40 ± 3.27 |
60.73 ± 3.02 / 14.02 ± 1.57 |
5.05 ± 1.56 / 28.95 ± 1.00 |
-0.00 ± 2.41 / 44.93 ± 0.92 |
9.2.0 |
9.2.0 |
12.5.1 |
11.0.0 |
9.2.0 |
12.1.0 |
meta-llama/Llama-2-7b-chat-hf (few-shot) |
6738 |
32 |
4096 |
False |
2,643 ± 455 / 800 ± 247 |
4.84 |
41.10 ± 3.35 / 40.54 ± 3.19 |
-1.07 ± 2.09 / 44.83 ± 2.20 |
16.13 ± 2.52 / 39.51 ± 1.98 |
62.30 ± 0.90 / 13.28 ± 1.36 |
3.27 ± 0.84 / 26.91 ± 0.86 |
1.84 ± 2.19 / 43.79 ± 0.73 |
9.3.1 |
9.3.1 |
12.4.0 |
12.4.0 |
9.3.1 |
12.1.0 |
bineric/NorskGPT-Llama-7B-v0.1 (few-shot) |
6738 |
32 |
4096 |
False |
5,384 ± 879 / 1,746 ± 553 |
4.87 |
34.62 ± 4.64 / 33.25 ± 4.37 |
-0.24 ± 1.43 / 33.75 ± 0.31 |
18.10 ± 1.85 / 43.52 ± 0.87 |
61.81 ± 0.98 / 15.04 ± 0.70 |
6.52 ± 1.10 / 30.08 ± 0.86 |
-1.90 ± 2.28 / 44.34 ± 1.19 |
12.5.2 |
12.3.2 |
12.3.2 |
12.3.2 |
12.3.2 |
12.3.2 |
AI-Sweden-Models/gpt-sw3-6.7b-v2-instruct (few-shot) |
7111 |
64 |
2048 |
True |
2,383 ± 451 / 718 ± 221 |
4.88 |
19.39 ± 1.31 / 19.04 ± 1.36 |
0.01 ± 1.49 / 34.61 ± 0.73 |
20.92 ± 3.41 / 51.75 ± 2.10 |
66.55 ± 0.34 / 18.80 ± 0.51 |
3.75 ± 1.13 / 27.07 ± 0.69 |
-2.37 ± 1.71 / 39.83 ± 3.57 |
12.7.0 |
12.7.0 |
12.4.0 |
12.4.0 |
12.7.0 |
12.7.0 |
LumiOpen/Viking-7B (few-shot) |
7550 |
131 |
4096 |
True |
5,723 ± 1,025 / 1,670 ± 559 |
4.95 |
21.41 ± 5.01 / 19.94 ± 5.01 |
1.76 ± 1.62 / 42.86 ± 2.68 |
22.54 ± 1.74 / 44.93 ± 1.92 |
57.33 ± 2.62 / 10.47 ± 1.06 |
1.01 ± 1.02 / 23.27 ± 1.18 |
-0.36 ± 2.40 / 44.44 ± 1.08 |
12.7.0 |
12.7.0 |
12.7.0 |
12.7.0 |
12.7.0 |
12.7.0 |
norallm/normistral-7b-warm-instruct (few-shot) |
7248 |
33 |
4096 |
True |
6,194 ± 949 / 1,967 ± 619 |
4.96 |
36.59 ± 3.56 / 27.50 ± 2.53 |
0.86 ± 2.41 / 36.44 ± 1.27 |
14.58 ± 2.13 / 37.44 ± 1.86 |
61.99 ± 1.16 / 15.07 ± 0.81 |
1.76 ± 0.67 / 23.24 ± 0.74 |
-0.98 ± 2.63 / 56.13 ± 0.62 |
12.7.0 |
12.7.0 |
12.7.0 |
12.7.0 |
12.7.0 |
12.7.0 |
tollefj/nordavind-7b-instruct-warm (few-shot) |
7248 |
33 |
2048 |
False |
6,450 ± 961 / 2,082 ± 658 |
4.96 |
34.76 ± 4.42 / 23.42 ± 2.33 |
0.77 ± 1.05 / 39.63 ± 2.41 |
12.80 ± 2.37 / 30.77 ± 2.12 |
61.23 ± 1.78 / 15.53 ± 0.95 |
0.93 ± 1.13 / 25.19 ± 0.78 |
-0.76 ± 3.69 / 53.64 ± 2.57 |
12.5.2 |
12.3.2 |
12.4.0 |
12.4.0 |
12.3.2 |
12.3.2 |
LumiOpen/Viking-13B (few-shot) |
14030 |
131 |
4224 |
True |
3,480 ± 727 / 822 ± 274 |
4.98 |
20.51 ± 3.13 / 20.32 ± 2.89 |
1.12 ± 1.87 / 46.10 ± 3.55 |
21.85 ± 3.13 / 45.40 ± 2.16 |
59.41 ± 2.83 / 10.57 ± 1.68 |
-0.48 ± 0.57 / 22.75 ± 0.82 |
0.06 ± 2.69 / 44.46 ± 1.65 |
12.5.2 |
12.5.2 |
12.5.2 |
12.5.2 |
12.5.2 |
12.5.2 |
AI-Sweden-Models/gpt-sw3-1.3b (few-shot) |
1445 |
64 |
2048 |
True |
4,608 ± 988 / 1,115 ± 354 |
5.00 |
1.42 ± 1.60 / 3.11 ± 1.85 |
0.75 ± 0.73 / 45.87 ± 2.20 |
23.33 ± 2.22 / 45.28 ± 1.58 |
64.23 ± 1.78 / 15.08 ± 2.03 |
0.89 ± 1.16 / 24.74 ± 0.76 |
0.68 ± 4.15 / 50.85 ± 2.65 |
12.7.0 |
12.7.0 |
12.7.0 |
12.7.0 |
12.7.0 |
12.7.0 |
Qwen/Qwen1.5-4B-Chat (few-shot) |
3950 |
152 |
32768 |
False |
4,347 ± 893 / 1,135 ± 365 |
5.02 |
25.65 ± 2.99 / 22.30 ± 2.30 |
-0.35 ± 2.01 / 44.36 ± 4.13 |
14.46 ± 2.66 / 32.31 ± 1.66 |
62.11 ± 2.22 / 14.98 ± 1.53 |
6.32 ± 1.09 / 29.87 ± 0.80 |
-1.89 ± 2.66 / 43.72 ± 0.92 |
12.5.2 |
12.1.0 |
12.5.2 |
12.1.0 |
12.1.0 |
12.1.0 |
01-ai/Yi-6B (few-shot) |
6061 |
64 |
4096 |
True |
2,786 ± 532 / 784 ± 250 |
5.03 |
0.00 ± 0.00 / 0.00 ± 0.00 |
2.12 ± 1.40 / 38.45 ± 2.47 |
16.91 ± 2.57 / 40.63 ± 2.83 |
60.02 ± 3.15 / 14.22 ± 1.52 |
8.44 ± 0.73 / 31.23 ± 0.59 |
0.72 ± 2.33 / 52.54 ± 2.18 |
9.3.2 |
10.0.0 |
12.5.1 |
12.0.0 |
10.0.1 |
12.1.0 |
HPLT/gpt-7b-nordic-prerelease (few-shot) |
7550 |
131 |
4096 |
True |
5,404 ± 931 / 1,638 ± 542 |
5.03 |
27.96 ± 3.08 / 25.78 ± 3.20 |
-0.00 ± 1.28 / 35.53 ± 1.87 |
23.17 ± 2.78 / 44.72 ± 2.82 |
55.57 ± 4.13 / 9.41 ± 1.58 |
-0.57 ± 0.97 / 21.78 ± 0.78 |
-2.72 ± 3.17 / 53.79 ± 1.42 |
12.5.2 |
12.3.2 |
12.3.2 |
12.3.2 |
12.3.2 |
12.3.2 |
AI-Sweden-Models/gpt-sw3-356m-instruct (few-shot) |
471 |
64 |
2048 |
True |
5,855 ± 1,373 / 1,223 ± 391 |
5.04 |
17.79 ± 1.18 / 18.12 ± 1.18 |
0.08 ± 0.15 / 33.73 ± 0.26 |
15.04 ± 2.53 / 34.77 ± 1.72 |
59.45 ± 1.99 / 12.89 ± 1.04 |
0.10 ± 1.39 / 21.91 ± 0.61 |
5.69 ± 2.26 / 56.71 ± 0.87 |
12.7.0 |
12.7.0 |
12.7.0 |
12.7.0 |
12.7.0 |
12.7.0 |
google/gemma-2b (few-shot) |
2506 |
256 |
8192 |
True |
6,087 ± 1,046 / 1,902 ± 563 |
5.05 |
8.83 ± 5.85 / 9.93 ± 4.70 |
0.31 ± 1.95 / 45.42 ± 3.51 |
16.08 ± 2.91 / 37.41 ± 2.44 |
60.00 ± 2.62 / 13.07 ± 1.31 |
4.71 ± 1.02 / 26.81 ± 0.83 |
0.00 ± 2.53 / 56.42 ± 0.98 |
12.5.2 |
12.1.0 |
12.1.0 |
12.1.0 |
12.1.0 |
12.1.0 |
google/gemma-2b-it (few-shot) |
2506 |
256 |
8192 |
False |
6,471 ± 1,142 / 1,961 ± 584 |
5.16 |
20.49 ± 2.30 / 18.33 ± 1.40 |
-0.01 ± 2.13 / 46.02 ± 2.71 |
10.95 ± 2.39 / 37.64 ± 0.75 |
59.16 ± 0.96 / 9.92 ± 1.05 |
2.42 ± 0.70 / 23.15 ± 0.70 |
0.62 ± 1.42 / 56.02 ± 0.95 |
12.5.2 |
12.1.0 |
12.4.0 |
12.4.0 |
12.1.0 |
12.1.0 |
Qwen/Qwen1.5-4B (few-shot) |
3950 |
152 |
32768 |
True |
3,248 ± 739 / 761 ± 252 |
5.17 |
15.66 ± 5.89 / 15.78 ± 3.95 |
-0.55 ± 1.06 / 39.57 ± 3.61 |
14.11 ± 3.08 / 34.56 ± 2.38 |
57.17 ± 3.07 / 11.73 ± 1.00 |
6.15 ± 0.90 / 29.29 ± 0.65 |
-1.71 ± 3.79 / 50.88 ± 1.29 |
12.5.2 |
12.1.0 |
12.1.0 |
12.1.0 |
12.1.0 |
12.1.0 |
timpal0l/Mistral-7B-v0.1-flashback-v2-instruct (few-shot) |
7242 |
32 |
32768 |
False |
5,172 ± 813 / 1,647 ± 518 |
5.18 |
24.98 ± 5.71 / 25.35 ± 4.78 |
1.18 ± 1.09 / 39.01 ± 2.76 |
8.52 ± 2.30 / 21.32 ± 2.25 |
39.94 ± 9.39 / 5.18 ± 1.53 |
5.09 ± 1.11 / 28.86 ± 0.73 |
4.70 ± 2.96 / 56.56 ± 0.97 |
12.5.2 |
12.3.2 |
12.3.2 |
12.3.2 |
12.3.2 |
12.3.2 |
Qwen/Qwen1.5-1.8B-Chat (few-shot) |
1837 |
152 |
32768 |
False |
8,304 ± 1,846 / 1,933 ± 617 |
5.21 |
14.15 ± 1.92 / 14.96 ± 2.11 |
0.78 ± 1.70 / 44.74 ± 3.57 |
7.80 ± 1.32 / 23.47 ± 1.64 |
57.27 ± 1.42 / 10.43 ± 0.97 |
3.87 ± 0.97 / 26.29 ± 0.94 |
1.92 ± 2.32 / 50.07 ± 2.68 |
12.5.2 |
12.1.0 |
12.5.0 |
12.5.0 |
12.1.0 |
12.1.0 |
Qwen/Qwen1.5-1.8B (few-shot) |
1837 |
152 |
32768 |
True |
5,666 ± 1,328 / 1,256 ± 408 |
5.25 |
12.26 ± 4.13 / 12.77 ± 3.60 |
0.94 ± 1.34 / 40.66 ± 3.73 |
6.31 ± 1.01 / 20.24 ± 2.02 |
55.32 ± 3.49 / 8.91 ± 1.05 |
3.79 ± 1.12 / 25.76 ± 0.91 |
1.13 ± 3.74 / 52.30 ± 2.26 |
12.5.2 |
12.1.0 |
12.1.0 |
12.1.0 |
12.1.0 |
12.1.0 |
Qwen/Qwen1.5-0.5B-Chat (few-shot) |
620 |
152 |
32768 |
False |
11,740 ± 3,000 / 2,209 ± 721 |
5.32 |
9.50 ± 3.17 / 9.41 ± 3.40 |
1.76 ± 1.62 / 38.51 ± 3.72 |
3.14 ± 0.71 / 17.84 ± 2.26 |
58.92 ± 1.57 / 10.09 ± 1.41 |
2.82 ± 0.73 / 26.56 ± 0.56 |
1.48 ± 3.56 / 53.95 ± 2.45 |
12.5.2 |
12.1.0 |
12.5.0 |
12.5.0 |
12.1.0 |
12.1.0 |
Qwen/Qwen1.5-0.5B (few-shot) |
620 |
152 |
32768 |
True |
11,371 ± 2,924 / 2,122 ± 692 |
5.35 |
16.20 ± 1.52 / 16.96 ± 1.71 |
-0.57 ± 1.20 / 41.25 ± 3.51 |
3.31 ± 0.82 / 16.86 ± 2.98 |
56.00 ± 3.13 / 10.05 ± 0.73 |
2.71 ± 0.84 / 24.35 ± 0.86 |
0.85 ± 1.91 / 52.12 ± 2.92 |
12.5.2 |
12.1.0 |
12.1.0 |
12.1.0 |
12.1.0 |
12.1.0 |
RJuro/kanelsnegl-v0.2 (few-shot) |
7242 |
32 |
512 |
True |
1,373 ± 120 / 709 ± 172 |
5.47 |
23.67 ± 5.16 / 23.19 ± 4.37 |
0.00 ± 0.00 / 33.69 ± 0.28 |
0.00 ± 0.00 / 14.61 ± 2.02 |
50.54 ± 0.14 / 3.11 ± 0.06 |
0.11 ± 0.23 / 21.58 ± 0.59 |
0.00 ± 0.00 / 56.52 ± 0.89 |
12.7.0 |
12.7.0 |
12.7.0 |
12.7.0 |
12.7.0 |
12.7.0 |
RuterNorway/Llama-2-7b-chat-norwegian (few-shot) |
unknown |
32 |
4096 |
False |
10,890 ± 2,686 / 2,186 ± 750 |
5.53 |
9.48 ± 1.48 / 10.10 ± 1.44 |
0.07 ± 1.06 / 43.54 ± 3.63 |
1.04 ± 0.96 / 7.35 ± 3.52 |
55.16 ± 1.26 / 10.52 ± 1.13 |
0.74 ± 0.76 / 25.88 ± 1.33 |
-0.16 ± 0.86 / 32.02 ± 2.77 |
9.3.1 |
9.3.1 |
12.5.2 |
12.4.0 |
9.3.1 |
12.1.0 |
allenai/OLMo-7B-Twin-2T (few-shot) |
6888 |
50 |
2176 |
True |
5,484 ± 1,125 / 1,317 ± 425 |
5.58 |
9.04 ± 5.36 / 9.25 ± 4.70 |
-0.08 ± 1.02 / 34.02 ± 0.68 |
8.86 ± 2.70 / 27.28 ± 1.45 |
40.35 ± 0.29 / 6.83 ± 0.14 |
1.17 ± 1.14 / 23.52 ± 1.31 |
-3.46 ± 3.64 / 47.67 ± 2.88 |
12.5.2 |
12.5.2 |
12.5.2 |
12.5.2 |
12.5.2 |
12.5.2 |
allenai/OLMo-7B (few-shot) |
6888 |
50 |
2176 |
True |
5,403 ± 1,133 / 1,294 ± 423 |
5.60 |
8.84 ± 2.72 / 8.59 ± 2.81 |
-0.38 ± 1.52 / 34.73 ± 1.40 |
5.08 ± 1.06 / 18.85 ± 2.62 |
42.35 ± 0.38 / 6.36 ± 0.24 |
2.31 ± 1.36 / 25.90 ± 1.45 |
-1.79 ± 3.44 / 45.33 ± 2.32 |
12.5.2 |
12.5.2 |
12.5.2 |
12.5.2 |
12.5.2 |
12.5.2 |
NorGLM/NorGPT-369M (few-shot) |
unknown |
64 |
1024 |
True |
19,896 ± 5,099 / 3,848 ± 1,251 |
5.76 |
1.68 ± 1.40 / 1.54 ± 1.28 |
-1.38 ± 1.13 / 34.41 ± 2.16 |
0.08 ± 0.09 / 10.05 ± 2.08 |
44.02 ± 1.31 / 6.35 ± 0.43 |
-0.40 ± 0.83 / 23.98 ± 0.87 |
0.28 ± 1.39 / 32.09 ± 2.15 |
12.5.2 |
12.5.2 |
12.5.2 |
12.5.2 |
12.5.2 |
12.5.2 |
Sigurdur/icebreaker (few-shot) |
110 |
32 |
1024 |
False |
48,619 ± 7,681 / 13,831 ± 4,404 |
5.76 |
0.00 ± 0.00 / 0.00 ± 0.00 |
0.00 ± 0.00 / 33.69 ± 0.28 |
0.00 ± 0.00 / 3.90 ± 0.28 |
44.80 ± 0.65 / 3.34 ± 0.08 |
0.25 ± 0.63 / 21.60 ± 0.59 |
0.38 ± 0.75 / 56.53 ± 0.89 |
12.5.2 |
12.5.2 |
12.5.2 |
12.5.2 |
12.5.2 |
12.5.2 |
allenai/OLMo-1B (few-shot) |
1177 |
50 |
2176 |
True |
8,536 ± 1,926 / 1,940 ± 619 |
5.77 |
13.60 ± 2.38 / 14.36 ± 2.36 |
-1.04 ± 0.90 / 34.46 ± 1.41 |
1.51 ± 0.68 / 13.16 ± 3.12 |
37.41 ± 0.29 / 5.10 ± 0.15 |
-0.09 ± 0.72 / 22.80 ± 0.92 |
-1.03 ± 1.32 / 56.25 ± 0.87 |
12.5.2 |
12.1.0 |
12.1.0 |
12.1.0 |
12.1.0 |
12.1.0 |
Sigurdur/icechat (few-shot) |
110 |
32 |
1024 |
False |
49,558 ± 7,930 / 13,921 ± 4,425 |
5.80 |
0.00 ± 0.00 / 0.00 ± 0.00 |
0.00 ± 0.00 / 33.69 ± 0.28 |
0.00 ± 0.00 / 0.64 ± 0.34 |
42.46 ± 0.47 / 3.58 ± 0.45 |
-0.37 ± 0.49 / 21.56 ± 0.58 |
0.00 ± 0.00 / 56.52 ± 0.89 |
12.5.2 |
12.5.2 |
12.5.2 |
12.5.2 |
12.5.2 |
12.5.2 |
Sigurdur/qa-icebreaker (few-shot) |
110 |
32 |
1024 |
False |
44,889 ± 6,944 / 13,506 ± 4,256 |
5.80 |
0.00 ± 0.00 / 0.00 ± 0.00 |
0.00 ± 0.00 / 33.69 ± 0.28 |
0.00 ± 0.00 / 4.86 ± 0.20 |
42.23 ± 0.58 / 2.83 ± 0.31 |
-0.55 ± 0.47 / 21.55 ± 0.58 |
0.00 ± 0.00 / 56.52 ± 0.89 |
12.5.2 |
12.5.2 |
12.5.2 |
12.5.2 |
12.5.2 |
12.5.2 |
ai-forever/mGPT (few-shot) |
unknown |
100 |
1024 |
True |
13,551 ± 4,259 / 2,563 ± 838 |
6.09 |
0.00 ± 0.00 / 0.00 ± 0.00 |
0.00 ± 0.00 / 33.69 ± 0.28 |
0.00 ± 0.00 / 0.05 ± 0.03 |
17.11 ± 1.37 / 0.96 ± 0.09 |
0.69 ± 0.98 / 23.34 ± 0.72 |
0.47 ± 4.14 / 46.93 ± 3.13 |
9.3.1 |
11.0.0 |
12.5.1 |
12.0.0 |
11.0.0 |
12.1.0 |
Sigurdur/jonas-hallgrimsson-gpt2 (few-shot) |
125 |
51 |
512 |
False |
32,644 ± 3,887 / 11,289 ± 3,585 |
6.35 |
0.00 ± 0.00 / 0.00 ± 0.00 |
0.00 ± 0.00 / 33.69 ± 0.28 |
0.00 ± 0.00 / 0.00 ± 0.00 |
0.00 ± 0.00 / 0.00 ± 0.00 |
0.08 ± 0.23 / 21.58 ± 0.58 |
-0.01 ± 1.21 / 55.08 ± 0.99 |
12.5.2 |
12.5.2 |
12.5.2 |
12.5.2 |
12.5.2 |
12.5.2 |