diff --git a/leaderboard/index.html b/leaderboard/index.html index 40ded5f..34f6196 100644 --- a/leaderboard/index.html +++ b/leaderboard/index.html @@ -71,9 +71,14 @@

LLM Game Benchmark Leaderboard

- - +


+
+ + + +
+
diff --git a/leaderboard/leaderboard-data-agg-gametype-prompttype-llm2.json b/leaderboard/leaderboard-data-agg-gametype-prompttype-llm2.json new file mode 100644 index 0000000..c303d31 --- /dev/null +++ b/leaderboard/leaderboard-data-agg-gametype-prompttype-llm2.json @@ -0,0 +1,90 @@ +[ + { + "LLM1stPlayer": "anthropic.claude-3-5-sonnet-20240620-v1:0", + "Wins-1st": 185, + "Disqualifications-1st": 30, + "Disqualifications-2nd": 54, + "Draws": 5, + "InvalidMoves-1st": 827, + "InvalidMoves-2nd": 990, + "TotalMoves-1st": 2587, + "TotalMoves-2nd": 2506 + }, + { + "LLM1stPlayer": "anthropic.claude-3-sonnet-20240229-v1:0", + "Wins-1st": 126, + "Disqualifications-1st": 43, + "Disqualifications-2nd": 56, + "Draws": 8, + "InvalidMoves-1st": 960, + "InvalidMoves-2nd": 1143, + "TotalMoves-1st": 3149, + "TotalMoves-2nd": 3125 + }, + { + "LLM1stPlayer": "gemini-1.5-flash", + "Wins-1st": 129, + "Disqualifications-1st": 55, + "Disqualifications-2nd": 44, + "Draws": 6, + "InvalidMoves-1st": 896, + "InvalidMoves-2nd": 872, + "TotalMoves-1st": 2700, + "TotalMoves-2nd": 2497 + }, + { + "LLM1stPlayer": "gemini-1.5-pro", + "Wins-1st": 138, + "Disqualifications-1st": 75, + "Disqualifications-2nd": 33, + "Draws": 9, + "InvalidMoves-1st": 1257, + "InvalidMoves-2nd": 816, + "TotalMoves-1st": 3065, + "TotalMoves-2nd": 2457 + }, + { + "LLM1stPlayer": "gpt-4-turbo", + "Wins-1st": 137, + "Disqualifications-1st": 91, + "Disqualifications-2nd": 22, + "Draws": 4, + "InvalidMoves-1st": 1504, + "InvalidMoves-2nd": 598, + "TotalMoves-1st": 3099, + "TotalMoves-2nd": 2023 + }, + { + "LLM1stPlayer": "gpt-4o", + "Wins-1st": 160, + "Disqualifications-1st": 23, + "Disqualifications-2nd": 64, + "Draws": 1, + "InvalidMoves-1st": 858, + "InvalidMoves-2nd": 1172, + "TotalMoves-1st": 2845, + "TotalMoves-2nd": 2934 + }, + { + "LLM1stPlayer": "meta.llama3-70b-instruct-v1:0", + "Wins-1st": 116, + "Disqualifications-1st": 15, + "Disqualifications-2nd": 23, + "Draws": 4, + "InvalidMoves-1st": 497, + "InvalidMoves-2nd": 486, + "TotalMoves-1st": 1816, + "TotalMoves-2nd": 1662 + }, + { + "LLM1stPlayer": "random-play", + "Wins-1st": 53, + "Disqualifications-1st": 44, + "Disqualifications-2nd": 47, + "Draws": 0, + "InvalidMoves-1st": 354, + "InvalidMoves-2nd": 988, + "TotalMoves-1st": 2670, + "TotalMoves-2nd": 3204 + } +] \ No newline at end of file diff --git a/leaderboard/leaderboard-data-agg-gametype-prompttype.json b/leaderboard/leaderboard-data-agg-gametype-prompttype.json new file mode 100644 index 0000000..d2bc654 --- /dev/null +++ b/leaderboard/leaderboard-data-agg-gametype-prompttype.json @@ -0,0 +1,730 @@ +[ + { + "LLM1stPlayer": "anthropic.claude-3-5-sonnet-20240620-v1:0", + "LLM2ndPlayer": "anthropic.claude-3-sonnet-20240229-v1:0", + "Wins-1st": 30, + "Wins-2nd": 4, + "Disqualifications-1st": 5, + "Disqualifications-2nd": 6, + "Draws": 0, + "InvalidMoves-1st": 164, + "InvalidMoves-2nd": 124, + "TotalMoves-1st": 442, + "TotalMoves-2nd": 366 + }, + { + "LLM1stPlayer": "anthropic.claude-3-5-sonnet-20240620-v1:0", + "LLM2ndPlayer": "gemini-1.5-flash", + "Wins-1st": 25, + "Wins-2nd": 5, + "Disqualifications-1st": 6, + "Disqualifications-2nd": 9, + "Draws": 0, + "InvalidMoves-1st": 120, + "InvalidMoves-2nd": 179, + "TotalMoves-1st": 375, + "TotalMoves-2nd": 400 + }, + { + "LLM1stPlayer": "anthropic.claude-3-5-sonnet-20240620-v1:0", + "LLM2ndPlayer": "gemini-1.5-pro", + "Wins-1st": 20, + "Wins-2nd": 9, + "Disqualifications-1st": 2, + "Disqualifications-2nd": 14, + "Draws": 0, + "InvalidMoves-1st": 114, + "InvalidMoves-2nd": 221, + "TotalMoves-1st": 383, + "TotalMoves-2nd": 456 + }, + { + "LLM1stPlayer": "anthropic.claude-3-5-sonnet-20240620-v1:0", + "LLM2ndPlayer": "gpt-4-turbo", + "Wins-1st": 25, + "Wins-2nd": 5, + "Disqualifications-1st": 1, + "Disqualifications-2nd": 14, + "Draws": 0, + "InvalidMoves-1st": 93, + "InvalidMoves-2nd": 213, + "TotalMoves-1st": 328, + "TotalMoves-2nd": 409 + }, + { + "LLM1stPlayer": "anthropic.claude-3-5-sonnet-20240620-v1:0", + "LLM2ndPlayer": "gpt-4o", + "Wins-1st": 31, + "Wins-2nd": 1, + "Disqualifications-1st": 9, + "Disqualifications-2nd": 3, + "Draws": 1, + "InvalidMoves-1st": 187, + "InvalidMoves-2nd": 140, + "TotalMoves-1st": 510, + "TotalMoves-2nd": 428 + }, + { + "LLM1stPlayer": "anthropic.claude-3-5-sonnet-20240620-v1:0", + "LLM2ndPlayer": "meta.llama3-70b-instruct-v1:0", + "Wins-1st": 20, + "Wins-2nd": 2, + "Disqualifications-1st": 0, + "Disqualifications-2nd": 5, + "Draws": 3, + "InvalidMoves-1st": 12, + "InvalidMoves-2nd": 83, + "TotalMoves-1st": 155, + "TotalMoves-2nd": 198 + }, + { + "LLM1stPlayer": "anthropic.claude-3-5-sonnet-20240620-v1:0", + "LLM2ndPlayer": "random-play", + "Wins-1st": 34, + "Wins-2nd": 0, + "Disqualifications-1st": 7, + "Disqualifications-2nd": 3, + "Draws": 1, + "InvalidMoves-1st": 137, + "InvalidMoves-2nd": 30, + "TotalMoves-1st": 394, + "TotalMoves-2nd": 249 + }, + { + "LLM1stPlayer": "anthropic.claude-3-sonnet-20240229-v1:0", + "LLM2ndPlayer": "anthropic.claude-3-5-sonnet-20240620-v1:0", + "Wins-1st": 20, + "Wins-2nd": 11, + "Disqualifications-1st": 10, + "Disqualifications-2nd": 4, + "Draws": 0, + "InvalidMoves-1st": 193, + "InvalidMoves-2nd": 174, + "TotalMoves-1st": 530, + "TotalMoves-2nd": 485 + }, + { + "LLM1stPlayer": "anthropic.claude-3-sonnet-20240229-v1:0", + "LLM2ndPlayer": "gemini-1.5-flash", + "Wins-1st": 14, + "Wins-2nd": 10, + "Disqualifications-1st": 9, + "Disqualifications-2nd": 12, + "Draws": 0, + "InvalidMoves-1st": 167, + "InvalidMoves-2nd": 205, + "TotalMoves-1st": 479, + "TotalMoves-2nd": 491 + }, + { + "LLM1stPlayer": "anthropic.claude-3-sonnet-20240229-v1:0", + "LLM2ndPlayer": "gemini-1.5-pro", + "Wins-1st": 18, + "Wins-2nd": 8, + "Disqualifications-1st": 6, + "Disqualifications-2nd": 12, + "Draws": 1, + "InvalidMoves-1st": 124, + "InvalidMoves-2nd": 194, + "TotalMoves-1st": 378, + "TotalMoves-2nd": 417 + }, + { + "LLM1stPlayer": "anthropic.claude-3-sonnet-20240229-v1:0", + "LLM2ndPlayer": "gpt-4-turbo", + "Wins-1st": 13, + "Wins-2nd": 11, + "Disqualifications-1st": 3, + "Disqualifications-2nd": 16, + "Draws": 2, + "InvalidMoves-1st": 113, + "InvalidMoves-2nd": 289, + "TotalMoves-1st": 424, + "TotalMoves-2nd": 569 + }, + { + "LLM1stPlayer": "anthropic.claude-3-sonnet-20240229-v1:0", + "LLM2ndPlayer": "gpt-4o", + "Wins-1st": 18, + "Wins-2nd": 10, + "Disqualifications-1st": 11, + "Disqualifications-2nd": 3, + "Draws": 3, + "InvalidMoves-1st": 223, + "InvalidMoves-2nd": 129, + "TotalMoves-1st": 606, + "TotalMoves-2nd": 485 + }, + { + "LLM1stPlayer": "anthropic.claude-3-sonnet-20240229-v1:0", + "LLM2ndPlayer": "meta.llama3-70b-instruct-v1:0", + "Wins-1st": 13, + "Wins-2nd": 12, + "Disqualifications-1st": 1, + "Disqualifications-2nd": 3, + "Draws": 1, + "InvalidMoves-1st": 48, + "InvalidMoves-2nd": 90, + "TotalMoves-1st": 282, + "TotalMoves-2nd": 295 + }, + { + "LLM1stPlayer": "anthropic.claude-3-sonnet-20240229-v1:0", + "LLM2ndPlayer": "random-play", + "Wins-1st": 30, + "Wins-2nd": 5, + "Disqualifications-1st": 3, + "Disqualifications-2nd": 6, + "Draws": 1, + "InvalidMoves-1st": 92, + "InvalidMoves-2nd": 62, + "TotalMoves-1st": 450, + "TotalMoves-2nd": 383 + }, + { + "LLM1stPlayer": "gemini-1.5-flash", + "LLM2ndPlayer": "anthropic.claude-3-5-sonnet-20240620-v1:0", + "Wins-1st": 20, + "Wins-2nd": 10, + "Disqualifications-1st": 8, + "Disqualifications-2nd": 7, + "Draws": 0, + "InvalidMoves-1st": 158, + "InvalidMoves-2nd": 153, + "TotalMoves-1st": 443, + "TotalMoves-2nd": 411 + }, + { + "LLM1stPlayer": "gemini-1.5-flash", + "LLM2ndPlayer": "anthropic.claude-3-sonnet-20240229-v1:0", + "Wins-1st": 17, + "Wins-2nd": 9, + "Disqualifications-1st": 15, + "Disqualifications-2nd": 4, + "Draws": 0, + "InvalidMoves-1st": 204, + "InvalidMoves-2nd": 101, + "TotalMoves-1st": 489, + "TotalMoves-2nd": 365 + }, + { + "LLM1stPlayer": "gemini-1.5-flash", + "LLM2ndPlayer": "gemini-1.5-pro", + "Wins-1st": 19, + "Wins-2nd": 8, + "Disqualifications-1st": 8, + "Disqualifications-2nd": 10, + "Draws": 0, + "InvalidMoves-1st": 86, + "InvalidMoves-2nd": 182, + "TotalMoves-1st": 309, + "TotalMoves-2nd": 376 + }, + { + "LLM1stPlayer": "gemini-1.5-flash", + "LLM2ndPlayer": "gpt-4-turbo", + "Wins-1st": 20, + "Wins-2nd": 9, + "Disqualifications-1st": 5, + "Disqualifications-2nd": 11, + "Draws": 0, + "InvalidMoves-1st": 124, + "InvalidMoves-2nd": 219, + "TotalMoves-1st": 391, + "TotalMoves-2nd": 455 + }, + { + "LLM1stPlayer": "gemini-1.5-flash", + "LLM2ndPlayer": "gpt-4o", + "Wins-1st": 13, + "Wins-2nd": 17, + "Disqualifications-1st": 12, + "Disqualifications-2nd": 3, + "Draws": 0, + "InvalidMoves-1st": 182, + "InvalidMoves-2nd": 99, + "TotalMoves-1st": 483, + "TotalMoves-2nd": 384 + }, + { + "LLM1stPlayer": "gemini-1.5-flash", + "LLM2ndPlayer": "meta.llama3-70b-instruct-v1:0", + "Wins-1st": 10, + "Wins-2nd": 10, + "Disqualifications-1st": 1, + "Disqualifications-2nd": 4, + "Draws": 5, + "InvalidMoves-1st": 24, + "InvalidMoves-2nd": 75, + "TotalMoves-1st": 190, + "TotalMoves-2nd": 222 + }, + { + "LLM1stPlayer": "gemini-1.5-flash", + "LLM2ndPlayer": "random-play", + "Wins-1st": 30, + "Wins-2nd": 3, + "Disqualifications-1st": 6, + "Disqualifications-2nd": 5, + "Draws": 1, + "InvalidMoves-1st": 118, + "InvalidMoves-2nd": 43, + "TotalMoves-1st": 395, + "TotalMoves-2nd": 284 + }, + { + "LLM1stPlayer": "gemini-1.5-pro", + "LLM2ndPlayer": "anthropic.claude-3-5-sonnet-20240620-v1:0", + "Wins-1st": 20, + "Wins-2nd": 5, + "Disqualifications-1st": 11, + "Disqualifications-2nd": 5, + "Draws": 4, + "InvalidMoves-1st": 182, + "InvalidMoves-2nd": 138, + "TotalMoves-1st": 465, + "TotalMoves-2nd": 392 + }, + { + "LLM1stPlayer": "gemini-1.5-pro", + "LLM2ndPlayer": "anthropic.claude-3-sonnet-20240229-v1:0", + "Wins-1st": 21, + "Wins-2nd": 8, + "Disqualifications-1st": 12, + "Disqualifications-2nd": 3, + "Draws": 1, + "InvalidMoves-1st": 205, + "InvalidMoves-2nd": 105, + "TotalMoves-1st": 507, + "TotalMoves-2nd": 388 + }, + { + "LLM1stPlayer": "gemini-1.5-pro", + "LLM2ndPlayer": "gemini-1.5-flash", + "Wins-1st": 22, + "Wins-2nd": 4, + "Disqualifications-1st": 15, + "Disqualifications-2nd": 1, + "Draws": 3, + "InvalidMoves-1st": 202, + "InvalidMoves-2nd": 89, + "TotalMoves-1st": 427, + "TotalMoves-2nd": 288 + }, + { + "LLM1stPlayer": "gemini-1.5-pro", + "LLM2ndPlayer": "gpt-4-turbo", + "Wins-1st": 16, + "Wins-2nd": 9, + "Disqualifications-1st": 10, + "Disqualifications-2nd": 10, + "Draws": 0, + "InvalidMoves-1st": 212, + "InvalidMoves-2nd": 209, + "TotalMoves-1st": 468, + "TotalMoves-2nd": 439 + }, + { + "LLM1stPlayer": "gemini-1.5-pro", + "LLM2ndPlayer": "gpt-4o", + "Wins-1st": 17, + "Wins-2nd": 7, + "Disqualifications-1st": 16, + "Disqualifications-2nd": 4, + "Draws": 1, + "InvalidMoves-1st": 240, + "InvalidMoves-2nd": 149, + "TotalMoves-1st": 532, + "TotalMoves-2nd": 426 + }, + { + "LLM1stPlayer": "gemini-1.5-pro", + "LLM2ndPlayer": "meta.llama3-70b-instruct-v1:0", + "Wins-1st": 15, + "Wins-2nd": 7, + "Disqualifications-1st": 4, + "Disqualifications-2nd": 4, + "Draws": 0, + "InvalidMoves-1st": 53, + "InvalidMoves-2nd": 77, + "TotalMoves-1st": 187, + "TotalMoves-2nd": 192 + }, + { + "LLM1stPlayer": "gemini-1.5-pro", + "LLM2ndPlayer": "random-play", + "Wins-1st": 27, + "Wins-2nd": 5, + "Disqualifications-1st": 7, + "Disqualifications-2nd": 6, + "Draws": 0, + "InvalidMoves-1st": 163, + "InvalidMoves-2nd": 49, + "TotalMoves-1st": 479, + "TotalMoves-2nd": 332 + }, + { + "LLM1stPlayer": "gpt-4-turbo", + "LLM2ndPlayer": "anthropic.claude-3-5-sonnet-20240620-v1:0", + "Wins-1st": 16, + "Wins-2nd": 5, + "Disqualifications-1st": 22, + "Disqualifications-2nd": 1, + "Draws": 1, + "InvalidMoves-1st": 313, + "InvalidMoves-2nd": 93, + "TotalMoves-1st": 558, + "TotalMoves-2nd": 320 + }, + { + "LLM1stPlayer": "gpt-4-turbo", + "LLM2ndPlayer": "anthropic.claude-3-sonnet-20240229-v1:0", + "Wins-1st": 23, + "Wins-2nd": 6, + "Disqualifications-1st": 14, + "Disqualifications-2nd": 0, + "Draws": 2, + "InvalidMoves-1st": 258, + "InvalidMoves-2nd": 46, + "TotalMoves-1st": 518, + "TotalMoves-2nd": 281 + }, + { + "LLM1stPlayer": "gpt-4-turbo", + "LLM2ndPlayer": "gemini-1.5-flash", + "Wins-1st": 22, + "Wins-2nd": 8, + "Disqualifications-1st": 9, + "Disqualifications-2nd": 5, + "Draws": 1, + "InvalidMoves-1st": 173, + "InvalidMoves-2nd": 95, + "TotalMoves-1st": 401, + "TotalMoves-2nd": 295 + }, + { + "LLM1stPlayer": "gpt-4-turbo", + "LLM2ndPlayer": "gemini-1.5-pro", + "Wins-1st": 16, + "Wins-2nd": 7, + "Disqualifications-1st": 13, + "Disqualifications-2nd": 9, + "Draws": 0, + "InvalidMoves-1st": 245, + "InvalidMoves-2nd": 185, + "TotalMoves-1st": 490, + "TotalMoves-2nd": 398 + }, + { + "LLM1stPlayer": "gpt-4-turbo", + "LLM2ndPlayer": "gpt-4o", + "Wins-1st": 13, + "Wins-2nd": 12, + "Disqualifications-1st": 18, + "Disqualifications-2nd": 2, + "Draws": 0, + "InvalidMoves-1st": 271, + "InvalidMoves-2nd": 108, + "TotalMoves-1st": 511, + "TotalMoves-2nd": 333 + }, + { + "LLM1stPlayer": "gpt-4-turbo", + "LLM2ndPlayer": "meta.llama3-70b-instruct-v1:0", + "Wins-1st": 19, + "Wins-2nd": 5, + "Disqualifications-1st": 6, + "Disqualifications-2nd": 0, + "Draws": 0, + "InvalidMoves-1st": 89, + "InvalidMoves-2nd": 33, + "TotalMoves-1st": 219, + "TotalMoves-2nd": 144 + }, + { + "LLM1stPlayer": "gpt-4-turbo", + "LLM2ndPlayer": "random-play", + "Wins-1st": 28, + "Wins-2nd": 3, + "Disqualifications-1st": 9, + "Disqualifications-2nd": 5, + "Draws": 0, + "InvalidMoves-1st": 155, + "InvalidMoves-2nd": 38, + "TotalMoves-1st": 402, + "TotalMoves-2nd": 252 + }, + { + "LLM1stPlayer": "gpt-4o", + "LLM2ndPlayer": "anthropic.claude-3-5-sonnet-20240620-v1:0", + "Wins-1st": 16, + "Wins-2nd": 12, + "Disqualifications-1st": 9, + "Disqualifications-2nd": 8, + "Draws": 0, + "InvalidMoves-1st": 192, + "InvalidMoves-2nd": 176, + "TotalMoves-1st": 507, + "TotalMoves-2nd": 467 + }, + { + "LLM1stPlayer": "gpt-4o", + "LLM2ndPlayer": "anthropic.claude-3-sonnet-20240229-v1:0", + "Wins-1st": 27, + "Wins-2nd": 7, + "Disqualifications-1st": 4, + "Disqualifications-2nd": 7, + "Draws": 0, + "InvalidMoves-1st": 176, + "InvalidMoves-2nd": 149, + "TotalMoves-1st": 537, + "TotalMoves-2nd": 476 + }, + { + "LLM1stPlayer": "gpt-4o", + "LLM2ndPlayer": "gemini-1.5-flash", + "Wins-1st": 29, + "Wins-2nd": 8, + "Disqualifications-1st": 2, + "Disqualifications-2nd": 6, + "Draws": 0, + "InvalidMoves-1st": 70, + "InvalidMoves-2nd": 154, + "TotalMoves-1st": 325, + "TotalMoves-2nd": 374 + }, + { + "LLM1stPlayer": "gpt-4o", + "LLM2ndPlayer": "gemini-1.5-pro", + "Wins-1st": 20, + "Wins-2nd": 3, + "Disqualifications-1st": 4, + "Disqualifications-2nd": 18, + "Draws": 0, + "InvalidMoves-1st": 148, + "InvalidMoves-2nd": 259, + "TotalMoves-1st": 449, + "TotalMoves-2nd": 522 + }, + { + "LLM1stPlayer": "gpt-4o", + "LLM2ndPlayer": "gpt-4-turbo", + "Wins-1st": 11, + "Wins-2nd": 14, + "Disqualifications-1st": 3, + "Disqualifications-2nd": 17, + "Draws": 0, + "InvalidMoves-1st": 153, + "InvalidMoves-2nd": 303, + "TotalMoves-1st": 432, + "TotalMoves-2nd": 554 + }, + { + "LLM1stPlayer": "gpt-4o", + "LLM2ndPlayer": "meta.llama3-70b-instruct-v1:0", + "Wins-1st": 19, + "Wins-2nd": 6, + "Disqualifications-1st": 0, + "Disqualifications-2nd": 4, + "Draws": 1, + "InvalidMoves-1st": 27, + "InvalidMoves-2nd": 89, + "TotalMoves-1st": 185, + "TotalMoves-2nd": 223 + }, + { + "LLM1stPlayer": "gpt-4o", + "LLM2ndPlayer": "random-play", + "Wins-1st": 38, + "Wins-2nd": 2, + "Disqualifications-1st": 1, + "Disqualifications-2nd": 4, + "Draws": 0, + "InvalidMoves-1st": 92, + "InvalidMoves-2nd": 42, + "TotalMoves-1st": 410, + "TotalMoves-2nd": 318 + }, + { + "LLM1stPlayer": "meta.llama3-70b-instruct-v1:0", + "LLM2ndPlayer": "anthropic.claude-3-5-sonnet-20240620-v1:0", + "Wins-1st": 17, + "Wins-2nd": 6, + "Disqualifications-1st": 5, + "Disqualifications-2nd": 2, + "Draws": 0, + "InvalidMoves-1st": 85, + "InvalidMoves-2nd": 72, + "TotalMoves-1st": 273, + "TotalMoves-2nd": 241 + }, + { + "LLM1stPlayer": "meta.llama3-70b-instruct-v1:0", + "LLM2ndPlayer": "anthropic.claude-3-sonnet-20240229-v1:0", + "Wins-1st": 19, + "Wins-2nd": 9, + "Disqualifications-1st": 1, + "Disqualifications-2nd": 0, + "Draws": 1, + "InvalidMoves-1st": 81, + "InvalidMoves-2nd": 29, + "TotalMoves-1st": 315, + "TotalMoves-2nd": 243 + }, + { + "LLM1stPlayer": "meta.llama3-70b-instruct-v1:0", + "LLM2ndPlayer": "gemini-1.5-flash", + "Wins-1st": 16, + "Wins-2nd": 8, + "Disqualifications-1st": 0, + "Disqualifications-2nd": 4, + "Draws": 2, + "InvalidMoves-1st": 36, + "InvalidMoves-2nd": 85, + "TotalMoves-1st": 202, + "TotalMoves-2nd": 229 + }, + { + "LLM1stPlayer": "meta.llama3-70b-instruct-v1:0", + "LLM2ndPlayer": "gemini-1.5-pro", + "Wins-1st": 10, + "Wins-2nd": 12, + "Disqualifications-1st": 2, + "Disqualifications-2nd": 6, + "Draws": 0, + "InvalidMoves-1st": 73, + "InvalidMoves-2nd": 106, + "TotalMoves-1st": 248, + "TotalMoves-2nd": 265 + }, + { + "LLM1stPlayer": "meta.llama3-70b-instruct-v1:0", + "LLM2ndPlayer": "gpt-4-turbo", + "Wins-1st": 16, + "Wins-2nd": 7, + "Disqualifications-1st": 1, + "Disqualifications-2nd": 6, + "Draws": 0, + "InvalidMoves-1st": 55, + "InvalidMoves-2nd": 116, + "TotalMoves-1st": 224, + "TotalMoves-2nd": 263 + }, + { + "LLM1stPlayer": "meta.llama3-70b-instruct-v1:0", + "LLM2ndPlayer": "gpt-4o", + "Wins-1st": 18, + "Wins-2nd": 8, + "Disqualifications-1st": 3, + "Disqualifications-2nd": 0, + "Draws": 1, + "InvalidMoves-1st": 86, + "InvalidMoves-2nd": 51, + "TotalMoves-1st": 300, + "TotalMoves-2nd": 246 + }, + { + "LLM1stPlayer": "meta.llama3-70b-instruct-v1:0", + "LLM2ndPlayer": "random-play", + "Wins-1st": 20, + "Wins-2nd": 2, + "Disqualifications-1st": 3, + "Disqualifications-2nd": 5, + "Draws": 0, + "InvalidMoves-1st": 81, + "InvalidMoves-2nd": 27, + "TotalMoves-1st": 254, + "TotalMoves-2nd": 175 + }, + { + "LLM1stPlayer": "random-play", + "LLM2ndPlayer": "anthropic.claude-3-5-sonnet-20240620-v1:0", + "Wins-1st": 9, + "Wins-2nd": 19, + "Disqualifications-1st": 8, + "Disqualifications-2nd": 9, + "Draws": 0, + "InvalidMoves-1st": 56, + "InvalidMoves-2nd": 161, + "TotalMoves-1st": 390, + "TotalMoves-2nd": 477 + }, + { + "LLM1stPlayer": "random-play", + "LLM2ndPlayer": "anthropic.claude-3-sonnet-20240229-v1:0", + "Wins-1st": 7, + "Wins-2nd": 25, + "Disqualifications-1st": 9, + "Disqualifications-2nd": 4, + "Draws": 0, + "InvalidMoves-1st": 91, + "InvalidMoves-2nd": 122, + "TotalMoves-1st": 559, + "TotalMoves-2nd": 579 + }, + { + "LLM1stPlayer": "random-play", + "LLM2ndPlayer": "gemini-1.5-flash", + "Wins-1st": 8, + "Wins-2nd": 23, + "Disqualifications-1st": 6, + "Disqualifications-2nd": 8, + "Draws": 0, + "InvalidMoves-1st": 39, + "InvalidMoves-2nd": 139, + "TotalMoves-1st": 352, + "TotalMoves-2nd": 436 + }, + { + "LLM1stPlayer": "random-play", + "LLM2ndPlayer": "gemini-1.5-pro", + "Wins-1st": 6, + "Wins-2nd": 26, + "Disqualifications-1st": 7, + "Disqualifications-2nd": 6, + "Draws": 0, + "InvalidMoves-1st": 42, + "InvalidMoves-2nd": 139, + "TotalMoves-1st": 331, + "TotalMoves-2nd": 416 + }, + { + "LLM1stPlayer": "random-play", + "LLM2ndPlayer": "gpt-4-turbo", + "Wins-1st": 9, + "Wins-2nd": 18, + "Disqualifications-1st": 6, + "Disqualifications-2nd": 12, + "Draws": 0, + "InvalidMoves-1st": 41, + "InvalidMoves-2nd": 203, + "TotalMoves-1st": 282, + "TotalMoves-2nd": 423 + }, + { + "LLM1stPlayer": "random-play", + "LLM2ndPlayer": "gpt-4o", + "Wins-1st": 8, + "Wins-2nd": 27, + "Disqualifications-1st": 6, + "Disqualifications-2nd": 4, + "Draws": 0, + "InvalidMoves-1st": 53, + "InvalidMoves-2nd": 132, + "TotalMoves-1st": 435, + "TotalMoves-2nd": 502 + }, + { + "LLM1stPlayer": "random-play", + "LLM2ndPlayer": "meta.llama3-70b-instruct-v1:0", + "Wins-1st": 6, + "Wins-2nd": 18, + "Disqualifications-1st": 2, + "Disqualifications-2nd": 4, + "Draws": 0, + "InvalidMoves-1st": 32, + "InvalidMoves-2nd": 92, + "TotalMoves-1st": 321, + "TotalMoves-2nd": 371 + } +] \ No newline at end of file diff --git a/leaderboard/leaderboard-data-agg-gametype.json b/leaderboard/leaderboard-data-agg-gametype.json new file mode 100644 index 0000000..8cba8bf --- /dev/null +++ b/leaderboard/leaderboard-data-agg-gametype.json @@ -0,0 +1,2158 @@ +[ + { + "LLM1stPlayer": "anthropic.claude-3-5-sonnet-20240620-v1:0", + "LLM2ndPlayer": "anthropic.claude-3-sonnet-20240229-v1:0", + "PromptType": "illustration", + "Wins-1st": 12, + "Wins-2nd": 2, + "Disqualifications-1st": 1, + "Disqualifications-2nd": 0, + "Draws": 0, + "InvalidMoves-1st": 48, + "InvalidMoves-2nd": 31, + "TotalMoves-1st": 141, + "TotalMoves-2nd": 112 + }, + { + "LLM1stPlayer": "anthropic.claude-3-5-sonnet-20240620-v1:0", + "LLM2ndPlayer": "anthropic.claude-3-sonnet-20240229-v1:0", + "PromptType": "image", + "Wins-1st": 4, + "Wins-2nd": 1, + "Disqualifications-1st": 4, + "Disqualifications-2nd": 6, + "Draws": 0, + "InvalidMoves-1st": 102, + "InvalidMoves-2nd": 93, + "TotalMoves-1st": 204, + "TotalMoves-2nd": 185 + }, + { + "LLM1stPlayer": "anthropic.claude-3-5-sonnet-20240620-v1:0", + "LLM2ndPlayer": "anthropic.claude-3-sonnet-20240229-v1:0", + "PromptType": "list", + "Wins-1st": 14, + "Wins-2nd": 1, + "Disqualifications-1st": 0, + "Disqualifications-2nd": 0, + "Draws": 0, + "InvalidMoves-1st": 14, + "InvalidMoves-2nd": 0, + "TotalMoves-1st": 97, + "TotalMoves-2nd": 69 + }, + { + "LLM1stPlayer": "anthropic.claude-3-5-sonnet-20240620-v1:0", + "LLM2ndPlayer": "gemini-1.5-flash", + "PromptType": "illustration", + "Wins-1st": 8, + "Wins-2nd": 1, + "Disqualifications-1st": 1, + "Disqualifications-2nd": 5, + "Draws": 0, + "InvalidMoves-1st": 28, + "InvalidMoves-2nd": 90, + "TotalMoves-1st": 116, + "TotalMoves-2nd": 165 + }, + { + "LLM1stPlayer": "anthropic.claude-3-5-sonnet-20240620-v1:0", + "LLM2ndPlayer": "gemini-1.5-flash", + "PromptType": "image", + "Wins-1st": 2, + "Wins-2nd": 4, + "Disqualifications-1st": 5, + "Disqualifications-2nd": 4, + "Draws": 0, + "InvalidMoves-1st": 87, + "InvalidMoves-2nd": 89, + "TotalMoves-1st": 184, + "TotalMoves-2nd": 180 + }, + { + "LLM1stPlayer": "anthropic.claude-3-5-sonnet-20240620-v1:0", + "LLM2ndPlayer": "gemini-1.5-flash", + "PromptType": "list", + "Wins-1st": 15, + "Wins-2nd": 0, + "Disqualifications-1st": 0, + "Disqualifications-2nd": 0, + "Draws": 0, + "InvalidMoves-1st": 5, + "InvalidMoves-2nd": 0, + "TotalMoves-1st": 75, + "TotalMoves-2nd": 55 + }, + { + "LLM1stPlayer": "anthropic.claude-3-5-sonnet-20240620-v1:0", + "LLM2ndPlayer": "gemini-1.5-pro", + "PromptType": "illustration", + "Wins-1st": 7, + "Wins-2nd": 3, + "Disqualifications-1st": 0, + "Disqualifications-2nd": 5, + "Draws": 0, + "InvalidMoves-1st": 39, + "InvalidMoves-2nd": 89, + "TotalMoves-1st": 115, + "TotalMoves-2nd": 153 + }, + { + "LLM1stPlayer": "anthropic.claude-3-5-sonnet-20240620-v1:0", + "LLM2ndPlayer": "gemini-1.5-pro", + "PromptType": "image", + "Wins-1st": 3, + "Wins-2nd": 2, + "Disqualifications-1st": 2, + "Disqualifications-2nd": 8, + "Draws": 0, + "InvalidMoves-1st": 51, + "InvalidMoves-2nd": 113, + "TotalMoves-1st": 144, + "TotalMoves-2nd": 195 + }, + { + "LLM1stPlayer": "anthropic.claude-3-5-sonnet-20240620-v1:0", + "LLM2ndPlayer": "gemini-1.5-pro", + "PromptType": "list", + "Wins-1st": 10, + "Wins-2nd": 4, + "Disqualifications-1st": 0, + "Disqualifications-2nd": 1, + "Draws": 0, + "InvalidMoves-1st": 24, + "InvalidMoves-2nd": 19, + "TotalMoves-1st": 124, + "TotalMoves-2nd": 108 + }, + { + "LLM1stPlayer": "anthropic.claude-3-5-sonnet-20240620-v1:0", + "LLM2ndPlayer": "gpt-4-turbo", + "PromptType": "illustration", + "Wins-1st": 5, + "Wins-2nd": 2, + "Disqualifications-1st": 1, + "Disqualifications-2nd": 7, + "Draws": 0, + "InvalidMoves-1st": 37, + "InvalidMoves-2nd": 98, + "TotalMoves-1st": 105, + "TotalMoves-2nd": 154 + }, + { + "LLM1stPlayer": "anthropic.claude-3-5-sonnet-20240620-v1:0", + "LLM2ndPlayer": "gpt-4-turbo", + "PromptType": "image", + "Wins-1st": 6, + "Wins-2nd": 2, + "Disqualifications-1st": 0, + "Disqualifications-2nd": 7, + "Draws": 0, + "InvalidMoves-1st": 36, + "InvalidMoves-2nd": 105, + "TotalMoves-1st": 123, + "TotalMoves-2nd": 179 + }, + { + "LLM1stPlayer": "anthropic.claude-3-5-sonnet-20240620-v1:0", + "LLM2ndPlayer": "gpt-4-turbo", + "PromptType": "list", + "Wins-1st": 14, + "Wins-2nd": 1, + "Disqualifications-1st": 0, + "Disqualifications-2nd": 0, + "Draws": 0, + "InvalidMoves-1st": 20, + "InvalidMoves-2nd": 10, + "TotalMoves-1st": 100, + "TotalMoves-2nd": 76 + }, + { + "LLM1stPlayer": "anthropic.claude-3-5-sonnet-20240620-v1:0", + "LLM2ndPlayer": "gpt-4o", + "PromptType": "illustration", + "Wins-1st": 9, + "Wins-2nd": 0, + "Disqualifications-1st": 2, + "Disqualifications-2nd": 3, + "Draws": 1, + "InvalidMoves-1st": 67, + "InvalidMoves-2nd": 69, + "TotalMoves-1st": 175, + "TotalMoves-2nd": 164 + }, + { + "LLM1stPlayer": "anthropic.claude-3-5-sonnet-20240620-v1:0", + "LLM2ndPlayer": "gpt-4o", + "PromptType": "image", + "Wins-1st": 7, + "Wins-2nd": 1, + "Disqualifications-1st": 7, + "Disqualifications-2nd": 0, + "Draws": 0, + "InvalidMoves-1st": 109, + "InvalidMoves-2nd": 66, + "TotalMoves-1st": 228, + "TotalMoves-2nd": 178 + }, + { + "LLM1stPlayer": "anthropic.claude-3-5-sonnet-20240620-v1:0", + "LLM2ndPlayer": "gpt-4o", + "PromptType": "list", + "Wins-1st": 15, + "Wins-2nd": 0, + "Disqualifications-1st": 0, + "Disqualifications-2nd": 0, + "Draws": 0, + "InvalidMoves-1st": 11, + "InvalidMoves-2nd": 5, + "TotalMoves-1st": 107, + "TotalMoves-2nd": 86 + }, + { + "LLM1stPlayer": "anthropic.claude-3-5-sonnet-20240620-v1:0", + "LLM2ndPlayer": "meta.llama3-70b-instruct-v1:0", + "PromptType": "illustration", + "Wins-1st": 7, + "Wins-2nd": 2, + "Disqualifications-1st": 0, + "Disqualifications-2nd": 5, + "Draws": 1, + "InvalidMoves-1st": 11, + "InvalidMoves-2nd": 83, + "TotalMoves-1st": 83, + "TotalMoves-2nd": 142 + }, + { + "LLM1stPlayer": "anthropic.claude-3-5-sonnet-20240620-v1:0", + "LLM2ndPlayer": "meta.llama3-70b-instruct-v1:0", + "PromptType": "list", + "Wins-1st": 13, + "Wins-2nd": 0, + "Disqualifications-1st": 0, + "Disqualifications-2nd": 0, + "Draws": 2, + "InvalidMoves-1st": 1, + "InvalidMoves-2nd": 0, + "TotalMoves-1st": 72, + "TotalMoves-2nd": 56 + }, + { + "LLM1stPlayer": "anthropic.claude-3-5-sonnet-20240620-v1:0", + "LLM2ndPlayer": "random-play", + "PromptType": "illustration", + "Wins-1st": 10, + "Wins-2nd": 0, + "Disqualifications-1st": 2, + "Disqualifications-2nd": 2, + "Draws": 1, + "InvalidMoves-1st": 46, + "InvalidMoves-2nd": 9, + "TotalMoves-1st": 127, + "TotalMoves-2nd": 77 + }, + { + "LLM1stPlayer": "anthropic.claude-3-5-sonnet-20240620-v1:0", + "LLM2ndPlayer": "random-play", + "PromptType": "image", + "Wins-1st": 10, + "Wins-2nd": 0, + "Disqualifications-1st": 4, + "Disqualifications-2nd": 1, + "Draws": 0, + "InvalidMoves-1st": 84, + "InvalidMoves-2nd": 13, + "TotalMoves-1st": 193, + "TotalMoves-2nd": 111 + }, + { + "LLM1stPlayer": "anthropic.claude-3-5-sonnet-20240620-v1:0", + "LLM2ndPlayer": "random-play", + "PromptType": "list", + "Wins-1st": 14, + "Wins-2nd": 0, + "Disqualifications-1st": 1, + "Disqualifications-2nd": 0, + "Draws": 0, + "InvalidMoves-1st": 7, + "InvalidMoves-2nd": 8, + "TotalMoves-1st": 74, + "TotalMoves-2nd": 61 + }, + { + "LLM1stPlayer": "anthropic.claude-3-sonnet-20240229-v1:0", + "LLM2ndPlayer": "anthropic.claude-3-5-sonnet-20240620-v1:0", + "PromptType": "illustration", + "Wins-1st": 5, + "Wins-2nd": 7, + "Disqualifications-1st": 2, + "Disqualifications-2nd": 1, + "Draws": 0, + "InvalidMoves-1st": 76, + "InvalidMoves-2nd": 92, + "TotalMoves-1st": 214, + "TotalMoves-2nd": 223 + }, + { + "LLM1stPlayer": "anthropic.claude-3-sonnet-20240229-v1:0", + "LLM2ndPlayer": "anthropic.claude-3-5-sonnet-20240620-v1:0", + "PromptType": "image", + "Wins-1st": 4, + "Wins-2nd": 0, + "Disqualifications-1st": 8, + "Disqualifications-2nd": 3, + "Draws": 0, + "InvalidMoves-1st": 116, + "InvalidMoves-2nd": 76, + "TotalMoves-1st": 200, + "TotalMoves-2nd": 152 + }, + { + "LLM1stPlayer": "anthropic.claude-3-sonnet-20240229-v1:0", + "LLM2ndPlayer": "anthropic.claude-3-5-sonnet-20240620-v1:0", + "PromptType": "list", + "Wins-1st": 11, + "Wins-2nd": 4, + "Disqualifications-1st": 0, + "Disqualifications-2nd": 0, + "Draws": 0, + "InvalidMoves-1st": 1, + "InvalidMoves-2nd": 6, + "TotalMoves-1st": 116, + "TotalMoves-2nd": 110 + }, + { + "LLM1stPlayer": "anthropic.claude-3-sonnet-20240229-v1:0", + "LLM2ndPlayer": "gemini-1.5-flash", + "PromptType": "illustration", + "Wins-1st": 3, + "Wins-2nd": 3, + "Disqualifications-1st": 3, + "Disqualifications-2nd": 6, + "Draws": 0, + "InvalidMoves-1st": 61, + "InvalidMoves-2nd": 99, + "TotalMoves-1st": 156, + "TotalMoves-2nd": 185 + }, + { + "LLM1stPlayer": "anthropic.claude-3-sonnet-20240229-v1:0", + "LLM2ndPlayer": "gemini-1.5-flash", + "PromptType": "image", + "Wins-1st": 2, + "Wins-2nd": 1, + "Disqualifications-1st": 6, + "Disqualifications-2nd": 6, + "Draws": 0, + "InvalidMoves-1st": 104, + "InvalidMoves-2nd": 106, + "TotalMoves-1st": 203, + "TotalMoves-2nd": 197 + }, + { + "LLM1stPlayer": "anthropic.claude-3-sonnet-20240229-v1:0", + "LLM2ndPlayer": "gemini-1.5-flash", + "PromptType": "list", + "Wins-1st": 9, + "Wins-2nd": 6, + "Disqualifications-1st": 0, + "Disqualifications-2nd": 0, + "Draws": 0, + "InvalidMoves-1st": 2, + "InvalidMoves-2nd": 0, + "TotalMoves-1st": 120, + "TotalMoves-2nd": 109 + }, + { + "LLM1stPlayer": "anthropic.claude-3-sonnet-20240229-v1:0", + "LLM2ndPlayer": "gemini-1.5-pro", + "PromptType": "illustration", + "Wins-1st": 4, + "Wins-2nd": 5, + "Disqualifications-1st": 0, + "Disqualifications-2nd": 6, + "Draws": 0, + "InvalidMoves-1st": 37, + "InvalidMoves-2nd": 99, + "TotalMoves-1st": 127, + "TotalMoves-2nd": 179 + }, + { + "LLM1stPlayer": "anthropic.claude-3-sonnet-20240229-v1:0", + "LLM2ndPlayer": "gemini-1.5-pro", + "PromptType": "image", + "Wins-1st": 3, + "Wins-2nd": 0, + "Disqualifications-1st": 6, + "Disqualifications-2nd": 6, + "Draws": 0, + "InvalidMoves-1st": 86, + "InvalidMoves-2nd": 91, + "TotalMoves-1st": 160, + "TotalMoves-2nd": 156 + }, + { + "LLM1stPlayer": "anthropic.claude-3-sonnet-20240229-v1:0", + "LLM2ndPlayer": "gemini-1.5-pro", + "PromptType": "list", + "Wins-1st": 11, + "Wins-2nd": 3, + "Disqualifications-1st": 0, + "Disqualifications-2nd": 0, + "Draws": 1, + "InvalidMoves-1st": 1, + "InvalidMoves-2nd": 4, + "TotalMoves-1st": 91, + "TotalMoves-2nd": 82 + }, + { + "LLM1stPlayer": "anthropic.claude-3-sonnet-20240229-v1:0", + "LLM2ndPlayer": "gpt-4-turbo", + "PromptType": "illustration", + "Wins-1st": 1, + "Wins-2nd": 3, + "Disqualifications-1st": 0, + "Disqualifications-2nd": 10, + "Draws": 1, + "InvalidMoves-1st": 32, + "InvalidMoves-2nd": 123, + "TotalMoves-1st": 114, + "TotalMoves-2nd": 193 + }, + { + "LLM1stPlayer": "anthropic.claude-3-sonnet-20240229-v1:0", + "LLM2ndPlayer": "gpt-4-turbo", + "PromptType": "image", + "Wins-1st": 3, + "Wins-2nd": 4, + "Disqualifications-1st": 3, + "Disqualifications-2nd": 5, + "Draws": 0, + "InvalidMoves-1st": 70, + "InvalidMoves-2nd": 108, + "TotalMoves-1st": 154, + "TotalMoves-2nd": 184 + }, + { + "LLM1stPlayer": "anthropic.claude-3-sonnet-20240229-v1:0", + "LLM2ndPlayer": "gpt-4-turbo", + "PromptType": "list", + "Wins-1st": 9, + "Wins-2nd": 4, + "Disqualifications-1st": 0, + "Disqualifications-2nd": 1, + "Draws": 1, + "InvalidMoves-1st": 11, + "InvalidMoves-2nd": 58, + "TotalMoves-1st": 156, + "TotalMoves-2nd": 192 + }, + { + "LLM1stPlayer": "anthropic.claude-3-sonnet-20240229-v1:0", + "LLM2ndPlayer": "gpt-4o", + "PromptType": "illustration", + "Wins-1st": 8, + "Wins-2nd": 6, + "Disqualifications-1st": 1, + "Disqualifications-2nd": 0, + "Draws": 0, + "InvalidMoves-1st": 65, + "InvalidMoves-2nd": 63, + "TotalMoves-1st": 195, + "TotalMoves-2nd": 185 + }, + { + "LLM1stPlayer": "anthropic.claude-3-sonnet-20240229-v1:0", + "LLM2ndPlayer": "gpt-4o", + "PromptType": "image", + "Wins-1st": 1, + "Wins-2nd": 1, + "Disqualifications-1st": 10, + "Disqualifications-2nd": 3, + "Draws": 0, + "InvalidMoves-1st": 154, + "InvalidMoves-2nd": 58, + "TotalMoves-1st": 290, + "TotalMoves-2nd": 187 + }, + { + "LLM1stPlayer": "anthropic.claude-3-sonnet-20240229-v1:0", + "LLM2ndPlayer": "gpt-4o", + "PromptType": "list", + "Wins-1st": 9, + "Wins-2nd": 3, + "Disqualifications-1st": 0, + "Disqualifications-2nd": 0, + "Draws": 3, + "InvalidMoves-1st": 4, + "InvalidMoves-2nd": 8, + "TotalMoves-1st": 121, + "TotalMoves-2nd": 113 + }, + { + "LLM1stPlayer": "anthropic.claude-3-sonnet-20240229-v1:0", + "LLM2ndPlayer": "meta.llama3-70b-instruct-v1:0", + "PromptType": "illustration", + "Wins-1st": 3, + "Wins-2nd": 8, + "Disqualifications-1st": 1, + "Disqualifications-2nd": 3, + "Draws": 0, + "InvalidMoves-1st": 47, + "InvalidMoves-2nd": 89, + "TotalMoves-1st": 159, + "TotalMoves-2nd": 183 + }, + { + "LLM1stPlayer": "anthropic.claude-3-sonnet-20240229-v1:0", + "LLM2ndPlayer": "meta.llama3-70b-instruct-v1:0", + "PromptType": "list", + "Wins-1st": 10, + "Wins-2nd": 4, + "Disqualifications-1st": 0, + "Disqualifications-2nd": 0, + "Draws": 1, + "InvalidMoves-1st": 1, + "InvalidMoves-2nd": 1, + "TotalMoves-1st": 123, + "TotalMoves-2nd": 112 + }, + { + "LLM1stPlayer": "anthropic.claude-3-sonnet-20240229-v1:0", + "LLM2ndPlayer": "random-play", + "PromptType": "illustration", + "Wins-1st": 10, + "Wins-2nd": 2, + "Disqualifications-1st": 0, + "Disqualifications-2nd": 2, + "Draws": 1, + "InvalidMoves-1st": 34, + "InvalidMoves-2nd": 35, + "TotalMoves-1st": 177, + "TotalMoves-2nd": 165 + }, + { + "LLM1stPlayer": "anthropic.claude-3-sonnet-20240229-v1:0", + "LLM2ndPlayer": "random-play", + "PromptType": "image", + "Wins-1st": 11, + "Wins-2nd": 0, + "Disqualifications-1st": 2, + "Disqualifications-2nd": 2, + "Draws": 0, + "InvalidMoves-1st": 53, + "InvalidMoves-2nd": 16, + "TotalMoves-1st": 167, + "TotalMoves-2nd": 117 + }, + { + "LLM1stPlayer": "anthropic.claude-3-sonnet-20240229-v1:0", + "LLM2ndPlayer": "random-play", + "PromptType": "list", + "Wins-1st": 9, + "Wins-2nd": 3, + "Disqualifications-1st": 1, + "Disqualifications-2nd": 2, + "Draws": 0, + "InvalidMoves-1st": 5, + "InvalidMoves-2nd": 11, + "TotalMoves-1st": 106, + "TotalMoves-2nd": 101 + }, + { + "LLM1stPlayer": "gemini-1.5-flash", + "LLM2ndPlayer": "anthropic.claude-3-5-sonnet-20240620-v1:0", + "PromptType": "illustration", + "Wins-1st": 2, + "Wins-2nd": 8, + "Disqualifications-1st": 4, + "Disqualifications-2nd": 1, + "Draws": 0, + "InvalidMoves-1st": 78, + "InvalidMoves-2nd": 61, + "TotalMoves-1st": 168, + "TotalMoves-2nd": 148 + }, + { + "LLM1stPlayer": "gemini-1.5-flash", + "LLM2ndPlayer": "anthropic.claude-3-5-sonnet-20240620-v1:0", + "PromptType": "image", + "Wins-1st": 5, + "Wins-2nd": 0, + "Disqualifications-1st": 4, + "Disqualifications-2nd": 6, + "Draws": 0, + "InvalidMoves-1st": 80, + "InvalidMoves-2nd": 92, + "TotalMoves-1st": 184, + "TotalMoves-2nd": 185 + }, + { + "LLM1stPlayer": "gemini-1.5-flash", + "LLM2ndPlayer": "anthropic.claude-3-5-sonnet-20240620-v1:0", + "PromptType": "list", + "Wins-1st": 13, + "Wins-2nd": 2, + "Disqualifications-1st": 0, + "Disqualifications-2nd": 0, + "Draws": 0, + "InvalidMoves-1st": 0, + "InvalidMoves-2nd": 0, + "TotalMoves-1st": 91, + "TotalMoves-2nd": 78 + }, + { + "LLM1stPlayer": "gemini-1.5-flash", + "LLM2ndPlayer": "anthropic.claude-3-sonnet-20240229-v1:0", + "PromptType": "illustration", + "Wins-1st": 3, + "Wins-2nd": 4, + "Disqualifications-1st": 8, + "Disqualifications-2nd": 0, + "Draws": 0, + "InvalidMoves-1st": 97, + "InvalidMoves-2nd": 15, + "TotalMoves-1st": 177, + "TotalMoves-2nd": 92 + }, + { + "LLM1stPlayer": "gemini-1.5-flash", + "LLM2ndPlayer": "anthropic.claude-3-sonnet-20240229-v1:0", + "PromptType": "image", + "Wins-1st": 4, + "Wins-2nd": 0, + "Disqualifications-1st": 7, + "Disqualifications-2nd": 4, + "Draws": 0, + "InvalidMoves-1st": 107, + "InvalidMoves-2nd": 86, + "TotalMoves-1st": 207, + "TotalMoves-2nd": 178 + }, + { + "LLM1stPlayer": "gemini-1.5-flash", + "LLM2ndPlayer": "anthropic.claude-3-sonnet-20240229-v1:0", + "PromptType": "list", + "Wins-1st": 10, + "Wins-2nd": 5, + "Disqualifications-1st": 0, + "Disqualifications-2nd": 0, + "Draws": 0, + "InvalidMoves-1st": 0, + "InvalidMoves-2nd": 0, + "TotalMoves-1st": 105, + "TotalMoves-2nd": 95 + }, + { + "LLM1stPlayer": "gemini-1.5-flash", + "LLM2ndPlayer": "gemini-1.5-pro", + "PromptType": "illustration", + "Wins-1st": 8, + "Wins-2nd": 2, + "Disqualifications-1st": 0, + "Disqualifications-2nd": 5, + "Draws": 0, + "InvalidMoves-1st": 11, + "InvalidMoves-2nd": 91, + "TotalMoves-1st": 85, + "TotalMoves-2nd": 152 + }, + { + "LLM1stPlayer": "gemini-1.5-flash", + "LLM2ndPlayer": "gemini-1.5-pro", + "PromptType": "image", + "Wins-1st": 1, + "Wins-2nd": 1, + "Disqualifications-1st": 8, + "Disqualifications-2nd": 5, + "Draws": 0, + "InvalidMoves-1st": 70, + "InvalidMoves-2nd": 90, + "TotalMoves-1st": 142, + "TotalMoves-2nd": 156 + }, + { + "LLM1stPlayer": "gemini-1.5-flash", + "LLM2ndPlayer": "gemini-1.5-pro", + "PromptType": "list", + "Wins-1st": 10, + "Wins-2nd": 5, + "Disqualifications-1st": 0, + "Disqualifications-2nd": 0, + "Draws": 0, + "InvalidMoves-1st": 5, + "InvalidMoves-2nd": 1, + "TotalMoves-1st": 82, + "TotalMoves-2nd": 68 + }, + { + "LLM1stPlayer": "gemini-1.5-flash", + "LLM2ndPlayer": "gpt-4-turbo", + "PromptType": "illustration", + "Wins-1st": 3, + "Wins-2nd": 5, + "Disqualifications-1st": 4, + "Disqualifications-2nd": 3, + "Draws": 0, + "InvalidMoves-1st": 77, + "InvalidMoves-2nd": 71, + "TotalMoves-1st": 146, + "TotalMoves-2nd": 134 + }, + { + "LLM1stPlayer": "gemini-1.5-flash", + "LLM2ndPlayer": "gpt-4-turbo", + "PromptType": "image", + "Wins-1st": 6, + "Wins-2nd": 0, + "Disqualifications-1st": 1, + "Disqualifications-2nd": 8, + "Draws": 0, + "InvalidMoves-1st": 45, + "InvalidMoves-2nd": 109, + "TotalMoves-1st": 126, + "TotalMoves-2nd": 176 + }, + { + "LLM1stPlayer": "gemini-1.5-flash", + "LLM2ndPlayer": "gpt-4-turbo", + "PromptType": "list", + "Wins-1st": 11, + "Wins-2nd": 4, + "Disqualifications-1st": 0, + "Disqualifications-2nd": 0, + "Draws": 0, + "InvalidMoves-1st": 2, + "InvalidMoves-2nd": 39, + "TotalMoves-1st": 119, + "TotalMoves-2nd": 145 + }, + { + "LLM1stPlayer": "gemini-1.5-flash", + "LLM2ndPlayer": "gpt-4o", + "PromptType": "illustration", + "Wins-1st": 4, + "Wins-2nd": 6, + "Disqualifications-1st": 5, + "Disqualifications-2nd": 0, + "Draws": 0, + "InvalidMoves-1st": 66, + "InvalidMoves-2nd": 34, + "TotalMoves-1st": 151, + "TotalMoves-2nd": 115 + }, + { + "LLM1stPlayer": "gemini-1.5-flash", + "LLM2ndPlayer": "gpt-4o", + "PromptType": "image", + "Wins-1st": 3, + "Wins-2nd": 2, + "Disqualifications-1st": 7, + "Disqualifications-2nd": 3, + "Draws": 0, + "InvalidMoves-1st": 110, + "InvalidMoves-2nd": 64, + "TotalMoves-1st": 212, + "TotalMoves-2nd": 160 + }, + { + "LLM1stPlayer": "gemini-1.5-flash", + "LLM2ndPlayer": "gpt-4o", + "PromptType": "list", + "Wins-1st": 6, + "Wins-2nd": 9, + "Disqualifications-1st": 0, + "Disqualifications-2nd": 0, + "Draws": 0, + "InvalidMoves-1st": 6, + "InvalidMoves-2nd": 1, + "TotalMoves-1st": 120, + "TotalMoves-2nd": 109 + }, + { + "LLM1stPlayer": "gemini-1.5-flash", + "LLM2ndPlayer": "meta.llama3-70b-instruct-v1:0", + "PromptType": "illustration", + "Wins-1st": 3, + "Wins-2nd": 7, + "Disqualifications-1st": 1, + "Disqualifications-2nd": 4, + "Draws": 0, + "InvalidMoves-1st": 24, + "InvalidMoves-2nd": 75, + "TotalMoves-1st": 99, + "TotalMoves-2nd": 143 + }, + { + "LLM1stPlayer": "gemini-1.5-flash", + "LLM2ndPlayer": "meta.llama3-70b-instruct-v1:0", + "PromptType": "list", + "Wins-1st": 7, + "Wins-2nd": 3, + "Disqualifications-1st": 0, + "Disqualifications-2nd": 0, + "Draws": 5, + "InvalidMoves-1st": 0, + "InvalidMoves-2nd": 0, + "TotalMoves-1st": 91, + "TotalMoves-2nd": 79 + }, + { + "LLM1stPlayer": "gemini-1.5-flash", + "LLM2ndPlayer": "random-play", + "PromptType": "illustration", + "Wins-1st": 10, + "Wins-2nd": 1, + "Disqualifications-1st": 3, + "Disqualifications-2nd": 1, + "Draws": 0, + "InvalidMoves-1st": 48, + "InvalidMoves-2nd": 10, + "TotalMoves-1st": 122, + "TotalMoves-2nd": 73 + }, + { + "LLM1stPlayer": "gemini-1.5-flash", + "LLM2ndPlayer": "random-play", + "PromptType": "image", + "Wins-1st": 9, + "Wins-2nd": 0, + "Disqualifications-1st": 3, + "Disqualifications-2nd": 3, + "Draws": 0, + "InvalidMoves-1st": 70, + "InvalidMoves-2nd": 19, + "TotalMoves-1st": 188, + "TotalMoves-2nd": 125 + }, + { + "LLM1stPlayer": "gemini-1.5-flash", + "LLM2ndPlayer": "random-play", + "PromptType": "list", + "Wins-1st": 11, + "Wins-2nd": 2, + "Disqualifications-1st": 0, + "Disqualifications-2nd": 1, + "Draws": 1, + "InvalidMoves-1st": 0, + "InvalidMoves-2nd": 14, + "TotalMoves-1st": 85, + "TotalMoves-2nd": 86 + }, + { + "LLM1stPlayer": "gemini-1.5-pro", + "LLM2ndPlayer": "anthropic.claude-3-5-sonnet-20240620-v1:0", + "PromptType": "illustration", + "Wins-1st": 7, + "Wins-2nd": 2, + "Disqualifications-1st": 1, + "Disqualifications-2nd": 4, + "Draws": 1, + "InvalidMoves-1st": 25, + "InvalidMoves-2nd": 74, + "TotalMoves-1st": 94, + "TotalMoves-2nd": 131 + }, + { + "LLM1stPlayer": "gemini-1.5-pro", + "LLM2ndPlayer": "anthropic.claude-3-5-sonnet-20240620-v1:0", + "PromptType": "image", + "Wins-1st": 3, + "Wins-2nd": 3, + "Disqualifications-1st": 8, + "Disqualifications-2nd": 1, + "Draws": 0, + "InvalidMoves-1st": 109, + "InvalidMoves-2nd": 39, + "TotalMoves-1st": 200, + "TotalMoves-2nd": 126 + }, + { + "LLM1stPlayer": "gemini-1.5-pro", + "LLM2ndPlayer": "anthropic.claude-3-5-sonnet-20240620-v1:0", + "PromptType": "list", + "Wins-1st": 10, + "Wins-2nd": 0, + "Disqualifications-1st": 2, + "Disqualifications-2nd": 0, + "Draws": 3, + "InvalidMoves-1st": 48, + "InvalidMoves-2nd": 25, + "TotalMoves-1st": 171, + "TotalMoves-2nd": 135 + }, + { + "LLM1stPlayer": "gemini-1.5-pro", + "LLM2ndPlayer": "anthropic.claude-3-sonnet-20240229-v1:0", + "PromptType": "illustration", + "Wins-1st": 5, + "Wins-2nd": 5, + "Disqualifications-1st": 4, + "Disqualifications-2nd": 0, + "Draws": 1, + "InvalidMoves-1st": 94, + "InvalidMoves-2nd": 24, + "TotalMoves-1st": 188, + "TotalMoves-2nd": 112 + }, + { + "LLM1stPlayer": "gemini-1.5-pro", + "LLM2ndPlayer": "anthropic.claude-3-sonnet-20240229-v1:0", + "PromptType": "image", + "Wins-1st": 2, + "Wins-2nd": 2, + "Disqualifications-1st": 8, + "Disqualifications-2nd": 3, + "Draws": 0, + "InvalidMoves-1st": 109, + "InvalidMoves-2nd": 79, + "TotalMoves-1st": 217, + "TotalMoves-2nd": 188 + }, + { + "LLM1stPlayer": "gemini-1.5-pro", + "LLM2ndPlayer": "anthropic.claude-3-sonnet-20240229-v1:0", + "PromptType": "list", + "Wins-1st": 14, + "Wins-2nd": 1, + "Disqualifications-1st": 0, + "Disqualifications-2nd": 0, + "Draws": 0, + "InvalidMoves-1st": 2, + "InvalidMoves-2nd": 2, + "TotalMoves-1st": 102, + "TotalMoves-2nd": 88 + }, + { + "LLM1stPlayer": "gemini-1.5-pro", + "LLM2ndPlayer": "gemini-1.5-flash", + "PromptType": "illustration", + "Wins-1st": 3, + "Wins-2nd": 4, + "Disqualifications-1st": 4, + "Disqualifications-2nd": 1, + "Draws": 3, + "InvalidMoves-1st": 94, + "InvalidMoves-2nd": 51, + "TotalMoves-1st": 178, + "TotalMoves-2nd": 128 + }, + { + "LLM1stPlayer": "gemini-1.5-pro", + "LLM2ndPlayer": "gemini-1.5-flash", + "PromptType": "image", + "Wins-1st": 4, + "Wins-2nd": 0, + "Disqualifications-1st": 11, + "Disqualifications-2nd": 0, + "Draws": 0, + "InvalidMoves-1st": 108, + "InvalidMoves-2nd": 38, + "TotalMoves-1st": 184, + "TotalMoves-2nd": 110 + }, + { + "LLM1stPlayer": "gemini-1.5-pro", + "LLM2ndPlayer": "gemini-1.5-flash", + "PromptType": "list", + "Wins-1st": 15, + "Wins-2nd": 0, + "Disqualifications-1st": 0, + "Disqualifications-2nd": 0, + "Draws": 0, + "InvalidMoves-1st": 0, + "InvalidMoves-2nd": 0, + "TotalMoves-1st": 65, + "TotalMoves-2nd": 50 + }, + { + "LLM1stPlayer": "gemini-1.5-pro", + "LLM2ndPlayer": "gpt-4-turbo", + "PromptType": "illustration", + "Wins-1st": 5, + "Wins-2nd": 0, + "Disqualifications-1st": 6, + "Disqualifications-2nd": 4, + "Draws": 0, + "InvalidMoves-1st": 95, + "InvalidMoves-2nd": 67, + "TotalMoves-1st": 163, + "TotalMoves-2nd": 126 + }, + { + "LLM1stPlayer": "gemini-1.5-pro", + "LLM2ndPlayer": "gpt-4-turbo", + "PromptType": "image", + "Wins-1st": 3, + "Wins-2nd": 5, + "Disqualifications-1st": 3, + "Disqualifications-2nd": 4, + "Draws": 0, + "InvalidMoves-1st": 89, + "InvalidMoves-2nd": 88, + "TotalMoves-1st": 183, + "TotalMoves-2nd": 175 + }, + { + "LLM1stPlayer": "gemini-1.5-pro", + "LLM2ndPlayer": "gpt-4-turbo", + "PromptType": "list", + "Wins-1st": 8, + "Wins-2nd": 4, + "Disqualifications-1st": 1, + "Disqualifications-2nd": 2, + "Draws": 0, + "InvalidMoves-1st": 28, + "InvalidMoves-2nd": 54, + "TotalMoves-1st": 122, + "TotalMoves-2nd": 138 + }, + { + "LLM1stPlayer": "gemini-1.5-pro", + "LLM2ndPlayer": "gpt-4o", + "PromptType": "illustration", + "Wins-1st": 6, + "Wins-2nd": 5, + "Disqualifications-1st": 4, + "Disqualifications-2nd": 0, + "Draws": 0, + "InvalidMoves-1st": 90, + "InvalidMoves-2nd": 28, + "TotalMoves-1st": 174, + "TotalMoves-2nd": 113 + }, + { + "LLM1stPlayer": "gemini-1.5-pro", + "LLM2ndPlayer": "gpt-4o", + "PromptType": "image", + "Wins-1st": 2, + "Wins-2nd": 2, + "Disqualifications-1st": 11, + "Disqualifications-2nd": 0, + "Draws": 0, + "InvalidMoves-1st": 122, + "InvalidMoves-2nd": 33, + "TotalMoves-1st": 226, + "TotalMoves-2nd": 135 + }, + { + "LLM1stPlayer": "gemini-1.5-pro", + "LLM2ndPlayer": "gpt-4o", + "PromptType": "list", + "Wins-1st": 9, + "Wins-2nd": 0, + "Disqualifications-1st": 1, + "Disqualifications-2nd": 4, + "Draws": 1, + "InvalidMoves-1st": 28, + "InvalidMoves-2nd": 88, + "TotalMoves-1st": 132, + "TotalMoves-2nd": 178 + }, + { + "LLM1stPlayer": "gemini-1.5-pro", + "LLM2ndPlayer": "meta.llama3-70b-instruct-v1:0", + "PromptType": "illustration", + "Wins-1st": 1, + "Wins-2nd": 6, + "Disqualifications-1st": 4, + "Disqualifications-2nd": 4, + "Draws": 0, + "InvalidMoves-1st": 53, + "InvalidMoves-2nd": 77, + "TotalMoves-1st": 119, + "TotalMoves-2nd": 138 + }, + { + "LLM1stPlayer": "gemini-1.5-pro", + "LLM2ndPlayer": "meta.llama3-70b-instruct-v1:0", + "PromptType": "list", + "Wins-1st": 14, + "Wins-2nd": 1, + "Disqualifications-1st": 0, + "Disqualifications-2nd": 0, + "Draws": 0, + "InvalidMoves-1st": 0, + "InvalidMoves-2nd": 0, + "TotalMoves-1st": 68, + "TotalMoves-2nd": 54 + }, + { + "LLM1stPlayer": "gemini-1.5-pro", + "LLM2ndPlayer": "random-play", + "PromptType": "illustration", + "Wins-1st": 9, + "Wins-2nd": 1, + "Disqualifications-1st": 3, + "Disqualifications-2nd": 2, + "Draws": 0, + "InvalidMoves-1st": 69, + "InvalidMoves-2nd": 18, + "TotalMoves-1st": 160, + "TotalMoves-2nd": 98 + }, + { + "LLM1stPlayer": "gemini-1.5-pro", + "LLM2ndPlayer": "random-play", + "PromptType": "image", + "Wins-1st": 6, + "Wins-2nd": 3, + "Disqualifications-1st": 4, + "Disqualifications-2nd": 2, + "Draws": 0, + "InvalidMoves-1st": 90, + "InvalidMoves-2nd": 18, + "TotalMoves-1st": 223, + "TotalMoves-2nd": 143 + }, + { + "LLM1stPlayer": "gemini-1.5-pro", + "LLM2ndPlayer": "random-play", + "PromptType": "list", + "Wins-1st": 12, + "Wins-2nd": 1, + "Disqualifications-1st": 0, + "Disqualifications-2nd": 2, + "Draws": 0, + "InvalidMoves-1st": 4, + "InvalidMoves-2nd": 13, + "TotalMoves-1st": 96, + "TotalMoves-2nd": 91 + }, + { + "LLM1stPlayer": "gpt-4-turbo", + "LLM2ndPlayer": "anthropic.claude-3-5-sonnet-20240620-v1:0", + "PromptType": "illustration", + "Wins-1st": 4, + "Wins-2nd": 2, + "Disqualifications-1st": 9, + "Disqualifications-2nd": 0, + "Draws": 0, + "InvalidMoves-1st": 120, + "InvalidMoves-2nd": 43, + "TotalMoves-1st": 190, + "TotalMoves-2nd": 109 + }, + { + "LLM1stPlayer": "gpt-4-turbo", + "LLM2ndPlayer": "anthropic.claude-3-5-sonnet-20240620-v1:0", + "PromptType": "image", + "Wins-1st": 4, + "Wins-2nd": 2, + "Disqualifications-1st": 8, + "Disqualifications-2nd": 1, + "Draws": 0, + "InvalidMoves-1st": 106, + "InvalidMoves-2nd": 36, + "TotalMoves-1st": 174, + "TotalMoves-2nd": 99 + }, + { + "LLM1stPlayer": "gpt-4-turbo", + "LLM2ndPlayer": "anthropic.claude-3-5-sonnet-20240620-v1:0", + "PromptType": "list", + "Wins-1st": 8, + "Wins-2nd": 1, + "Disqualifications-1st": 5, + "Disqualifications-2nd": 0, + "Draws": 1, + "InvalidMoves-1st": 87, + "InvalidMoves-2nd": 14, + "TotalMoves-1st": 194, + "TotalMoves-2nd": 112 + }, + { + "LLM1stPlayer": "gpt-4-turbo", + "LLM2ndPlayer": "anthropic.claude-3-sonnet-20240229-v1:0", + "PromptType": "illustration", + "Wins-1st": 8, + "Wins-2nd": 1, + "Disqualifications-1st": 6, + "Disqualifications-2nd": 0, + "Draws": 0, + "InvalidMoves-1st": 92, + "InvalidMoves-2nd": 12, + "TotalMoves-1st": 162, + "TotalMoves-2nd": 74 + }, + { + "LLM1stPlayer": "gpt-4-turbo", + "LLM2ndPlayer": "anthropic.claude-3-sonnet-20240229-v1:0", + "PromptType": "image", + "Wins-1st": 6, + "Wins-2nd": 2, + "Disqualifications-1st": 7, + "Disqualifications-2nd": 0, + "Draws": 0, + "InvalidMoves-1st": 115, + "InvalidMoves-2nd": 31, + "TotalMoves-1st": 194, + "TotalMoves-2nd": 104 + }, + { + "LLM1stPlayer": "gpt-4-turbo", + "LLM2ndPlayer": "anthropic.claude-3-sonnet-20240229-v1:0", + "PromptType": "list", + "Wins-1st": 9, + "Wins-2nd": 3, + "Disqualifications-1st": 1, + "Disqualifications-2nd": 0, + "Draws": 2, + "InvalidMoves-1st": 51, + "InvalidMoves-2nd": 3, + "TotalMoves-1st": 162, + "TotalMoves-2nd": 103 + }, + { + "LLM1stPlayer": "gpt-4-turbo", + "LLM2ndPlayer": "gemini-1.5-flash", + "PromptType": "illustration", + "Wins-1st": 5, + "Wins-2nd": 1, + "Disqualifications-1st": 4, + "Disqualifications-2nd": 4, + "Draws": 1, + "InvalidMoves-1st": 52, + "InvalidMoves-2nd": 54, + "TotalMoves-1st": 120, + "TotalMoves-2nd": 112 + }, + { + "LLM1stPlayer": "gpt-4-turbo", + "LLM2ndPlayer": "gemini-1.5-flash", + "PromptType": "image", + "Wins-1st": 7, + "Wins-2nd": 2, + "Disqualifications-1st": 5, + "Disqualifications-2nd": 1, + "Draws": 0, + "InvalidMoves-1st": 99, + "InvalidMoves-2nd": 41, + "TotalMoves-1st": 178, + "TotalMoves-2nd": 112 + }, + { + "LLM1stPlayer": "gpt-4-turbo", + "LLM2ndPlayer": "gemini-1.5-flash", + "PromptType": "list", + "Wins-1st": 10, + "Wins-2nd": 5, + "Disqualifications-1st": 0, + "Disqualifications-2nd": 0, + "Draws": 0, + "InvalidMoves-1st": 22, + "InvalidMoves-2nd": 0, + "TotalMoves-1st": 103, + "TotalMoves-2nd": 71 + }, + { + "LLM1stPlayer": "gpt-4-turbo", + "LLM2ndPlayer": "gemini-1.5-pro", + "PromptType": "illustration", + "Wins-1st": 1, + "Wins-2nd": 3, + "Disqualifications-1st": 7, + "Disqualifications-2nd": 4, + "Draws": 0, + "InvalidMoves-1st": 83, + "InvalidMoves-2nd": 78, + "TotalMoves-1st": 150, + "TotalMoves-2nd": 133 + }, + { + "LLM1stPlayer": "gpt-4-turbo", + "LLM2ndPlayer": "gemini-1.5-pro", + "PromptType": "image", + "Wins-1st": 5, + "Wins-2nd": 1, + "Disqualifications-1st": 5, + "Disqualifications-2nd": 4, + "Draws": 0, + "InvalidMoves-1st": 102, + "InvalidMoves-2nd": 78, + "TotalMoves-1st": 184, + "TotalMoves-2nd": 151 + }, + { + "LLM1stPlayer": "gpt-4-turbo", + "LLM2ndPlayer": "gemini-1.5-pro", + "PromptType": "list", + "Wins-1st": 10, + "Wins-2nd": 3, + "Disqualifications-1st": 1, + "Disqualifications-2nd": 1, + "Draws": 0, + "InvalidMoves-1st": 60, + "InvalidMoves-2nd": 29, + "TotalMoves-1st": 156, + "TotalMoves-2nd": 114 + }, + { + "LLM1stPlayer": "gpt-4-turbo", + "LLM2ndPlayer": "gpt-4o", + "PromptType": "illustration", + "Wins-1st": 3, + "Wins-2nd": 3, + "Disqualifications-1st": 9, + "Disqualifications-2nd": 0, + "Draws": 0, + "InvalidMoves-1st": 113, + "InvalidMoves-2nd": 27, + "TotalMoves-1st": 180, + "TotalMoves-2nd": 91 + }, + { + "LLM1stPlayer": "gpt-4-turbo", + "LLM2ndPlayer": "gpt-4o", + "PromptType": "image", + "Wins-1st": 4, + "Wins-2nd": 2, + "Disqualifications-1st": 9, + "Disqualifications-2nd": 0, + "Draws": 0, + "InvalidMoves-1st": 116, + "InvalidMoves-2nd": 36, + "TotalMoves-1st": 188, + "TotalMoves-2nd": 104 + }, + { + "LLM1stPlayer": "gpt-4-turbo", + "LLM2ndPlayer": "gpt-4o", + "PromptType": "list", + "Wins-1st": 6, + "Wins-2nd": 7, + "Disqualifications-1st": 0, + "Disqualifications-2nd": 2, + "Draws": 0, + "InvalidMoves-1st": 42, + "InvalidMoves-2nd": 45, + "TotalMoves-1st": 143, + "TotalMoves-2nd": 138 + }, + { + "LLM1stPlayer": "gpt-4-turbo", + "LLM2ndPlayer": "meta.llama3-70b-instruct-v1:0", + "PromptType": "illustration", + "Wins-1st": 4, + "Wins-2nd": 5, + "Disqualifications-1st": 6, + "Disqualifications-2nd": 0, + "Draws": 0, + "InvalidMoves-1st": 81, + "InvalidMoves-2nd": 33, + "TotalMoves-1st": 140, + "TotalMoves-2nd": 88 + }, + { + "LLM1stPlayer": "gpt-4-turbo", + "LLM2ndPlayer": "meta.llama3-70b-instruct-v1:0", + "PromptType": "list", + "Wins-1st": 15, + "Wins-2nd": 0, + "Disqualifications-1st": 0, + "Disqualifications-2nd": 0, + "Draws": 0, + "InvalidMoves-1st": 8, + "InvalidMoves-2nd": 0, + "TotalMoves-1st": 79, + "TotalMoves-2nd": 56 + }, + { + "LLM1stPlayer": "gpt-4-turbo", + "LLM2ndPlayer": "random-play", + "PromptType": "illustration", + "Wins-1st": 11, + "Wins-2nd": 0, + "Disqualifications-1st": 2, + "Disqualifications-2nd": 2, + "Draws": 0, + "InvalidMoves-1st": 36, + "InvalidMoves-2nd": 11, + "TotalMoves-1st": 112, + "TotalMoves-2nd": 74 + }, + { + "LLM1stPlayer": "gpt-4-turbo", + "LLM2ndPlayer": "random-play", + "PromptType": "image", + "Wins-1st": 7, + "Wins-2nd": 1, + "Disqualifications-1st": 6, + "Disqualifications-2nd": 1, + "Draws": 0, + "InvalidMoves-1st": 98, + "InvalidMoves-2nd": 9, + "TotalMoves-1st": 186, + "TotalMoves-2nd": 89 + }, + { + "LLM1stPlayer": "gpt-4-turbo", + "LLM2ndPlayer": "random-play", + "PromptType": "list", + "Wins-1st": 10, + "Wins-2nd": 2, + "Disqualifications-1st": 1, + "Disqualifications-2nd": 2, + "Draws": 0, + "InvalidMoves-1st": 21, + "InvalidMoves-2nd": 18, + "TotalMoves-1st": 104, + "TotalMoves-2nd": 89 + }, + { + "LLM1stPlayer": "gpt-4o", + "LLM2ndPlayer": "anthropic.claude-3-5-sonnet-20240620-v1:0", + "PromptType": "illustration", + "Wins-1st": 5, + "Wins-2nd": 3, + "Disqualifications-1st": 4, + "Disqualifications-2nd": 3, + "Draws": 0, + "InvalidMoves-1st": 85, + "InvalidMoves-2nd": 73, + "TotalMoves-1st": 191, + "TotalMoves-2nd": 171 + }, + { + "LLM1stPlayer": "gpt-4o", + "LLM2ndPlayer": "anthropic.claude-3-5-sonnet-20240620-v1:0", + "PromptType": "image", + "Wins-1st": 2, + "Wins-2nd": 6, + "Disqualifications-1st": 2, + "Disqualifications-2nd": 5, + "Draws": 0, + "InvalidMoves-1st": 46, + "InvalidMoves-2nd": 89, + "TotalMoves-1st": 154, + "TotalMoves-2nd": 190 + }, + { + "LLM1stPlayer": "gpt-4o", + "LLM2ndPlayer": "anthropic.claude-3-5-sonnet-20240620-v1:0", + "PromptType": "list", + "Wins-1st": 9, + "Wins-2nd": 3, + "Disqualifications-1st": 3, + "Disqualifications-2nd": 0, + "Draws": 0, + "InvalidMoves-1st": 61, + "InvalidMoves-2nd": 14, + "TotalMoves-1st": 162, + "TotalMoves-2nd": 106 + }, + { + "LLM1stPlayer": "gpt-4o", + "LLM2ndPlayer": "anthropic.claude-3-sonnet-20240229-v1:0", + "PromptType": "illustration", + "Wins-1st": 9, + "Wins-2nd": 3, + "Disqualifications-1st": 1, + "Disqualifications-2nd": 2, + "Draws": 0, + "InvalidMoves-1st": 63, + "InvalidMoves-2nd": 60, + "TotalMoves-1st": 197, + "TotalMoves-2nd": 183 + }, + { + "LLM1stPlayer": "gpt-4o", + "LLM2ndPlayer": "anthropic.claude-3-sonnet-20240229-v1:0", + "PromptType": "image", + "Wins-1st": 6, + "Wins-2nd": 3, + "Disqualifications-1st": 1, + "Disqualifications-2nd": 5, + "Draws": 0, + "InvalidMoves-1st": 70, + "InvalidMoves-2nd": 89, + "TotalMoves-1st": 211, + "TotalMoves-2nd": 219 + }, + { + "LLM1stPlayer": "gpt-4o", + "LLM2ndPlayer": "anthropic.claude-3-sonnet-20240229-v1:0", + "PromptType": "list", + "Wins-1st": 12, + "Wins-2nd": 1, + "Disqualifications-1st": 2, + "Disqualifications-2nd": 0, + "Draws": 0, + "InvalidMoves-1st": 43, + "InvalidMoves-2nd": 0, + "TotalMoves-1st": 129, + "TotalMoves-2nd": 74 + }, + { + "LLM1stPlayer": "gpt-4o", + "LLM2ndPlayer": "gemini-1.5-flash", + "PromptType": "illustration", + "Wins-1st": 10, + "Wins-2nd": 1, + "Disqualifications-1st": 0, + "Disqualifications-2nd": 4, + "Draws": 0, + "InvalidMoves-1st": 19, + "InvalidMoves-2nd": 89, + "TotalMoves-1st": 104, + "TotalMoves-2nd": 160 + }, + { + "LLM1stPlayer": "gpt-4o", + "LLM2ndPlayer": "gemini-1.5-flash", + "PromptType": "image", + "Wins-1st": 9, + "Wins-2nd": 2, + "Disqualifications-1st": 2, + "Disqualifications-2nd": 2, + "Draws": 0, + "InvalidMoves-1st": 43, + "InvalidMoves-2nd": 65, + "TotalMoves-1st": 140, + "TotalMoves-2nd": 151 + }, + { + "LLM1stPlayer": "gpt-4o", + "LLM2ndPlayer": "gemini-1.5-flash", + "PromptType": "list", + "Wins-1st": 10, + "Wins-2nd": 5, + "Disqualifications-1st": 0, + "Disqualifications-2nd": 0, + "Draws": 0, + "InvalidMoves-1st": 8, + "InvalidMoves-2nd": 0, + "TotalMoves-1st": 81, + "TotalMoves-2nd": 63 + }, + { + "LLM1stPlayer": "gpt-4o", + "LLM2ndPlayer": "gemini-1.5-pro", + "PromptType": "illustration", + "Wins-1st": 6, + "Wins-2nd": 3, + "Disqualifications-1st": 0, + "Disqualifications-2nd": 6, + "Draws": 0, + "InvalidMoves-1st": 27, + "InvalidMoves-2nd": 105, + "TotalMoves-1st": 114, + "TotalMoves-2nd": 180 + }, + { + "LLM1stPlayer": "gpt-4o", + "LLM2ndPlayer": "gemini-1.5-pro", + "PromptType": "image", + "Wins-1st": 4, + "Wins-2nd": 0, + "Disqualifications-1st": 1, + "Disqualifications-2nd": 10, + "Draws": 0, + "InvalidMoves-1st": 40, + "InvalidMoves-2nd": 114, + "TotalMoves-1st": 152, + "TotalMoves-2nd": 212 + }, + { + "LLM1stPlayer": "gpt-4o", + "LLM2ndPlayer": "gemini-1.5-pro", + "PromptType": "list", + "Wins-1st": 10, + "Wins-2nd": 0, + "Disqualifications-1st": 3, + "Disqualifications-2nd": 2, + "Draws": 0, + "InvalidMoves-1st": 81, + "InvalidMoves-2nd": 40, + "TotalMoves-1st": 183, + "TotalMoves-2nd": 130 + }, + { + "LLM1stPlayer": "gpt-4o", + "LLM2ndPlayer": "gpt-4-turbo", + "PromptType": "illustration", + "Wins-1st": 4, + "Wins-2nd": 3, + "Disqualifications-1st": 0, + "Disqualifications-2nd": 8, + "Draws": 0, + "InvalidMoves-1st": 41, + "InvalidMoves-2nd": 122, + "TotalMoves-1st": 117, + "TotalMoves-2nd": 186 + }, + { + "LLM1stPlayer": "gpt-4o", + "LLM2ndPlayer": "gpt-4-turbo", + "PromptType": "image", + "Wins-1st": 2, + "Wins-2nd": 5, + "Disqualifications-1st": 2, + "Disqualifications-2nd": 6, + "Draws": 0, + "InvalidMoves-1st": 56, + "InvalidMoves-2nd": 103, + "TotalMoves-1st": 152, + "TotalMoves-2nd": 191 + }, + { + "LLM1stPlayer": "gpt-4o", + "LLM2ndPlayer": "gpt-4-turbo", + "PromptType": "list", + "Wins-1st": 5, + "Wins-2nd": 6, + "Disqualifications-1st": 1, + "Disqualifications-2nd": 3, + "Draws": 0, + "InvalidMoves-1st": 56, + "InvalidMoves-2nd": 78, + "TotalMoves-1st": 163, + "TotalMoves-2nd": 177 + }, + { + "LLM1stPlayer": "gpt-4o", + "LLM2ndPlayer": "meta.llama3-70b-instruct-v1:0", + "PromptType": "illustration", + "Wins-1st": 5, + "Wins-2nd": 5, + "Disqualifications-1st": 0, + "Disqualifications-2nd": 4, + "Draws": 1, + "InvalidMoves-1st": 17, + "InvalidMoves-2nd": 89, + "TotalMoves-1st": 108, + "TotalMoves-2nd": 170 + }, + { + "LLM1stPlayer": "gpt-4o", + "LLM2ndPlayer": "meta.llama3-70b-instruct-v1:0", + "PromptType": "list", + "Wins-1st": 14, + "Wins-2nd": 1, + "Disqualifications-1st": 0, + "Disqualifications-2nd": 0, + "Draws": 0, + "InvalidMoves-1st": 10, + "InvalidMoves-2nd": 0, + "TotalMoves-1st": 77, + "TotalMoves-2nd": 53 + }, + { + "LLM1stPlayer": "gpt-4o", + "LLM2ndPlayer": "random-play", + "PromptType": "illustration", + "Wins-1st": 12, + "Wins-2nd": 1, + "Disqualifications-1st": 0, + "Disqualifications-2nd": 2, + "Draws": 0, + "InvalidMoves-1st": 40, + "InvalidMoves-2nd": 17, + "TotalMoves-1st": 156, + "TotalMoves-2nd": 119 + }, + { + "LLM1stPlayer": "gpt-4o", + "LLM2ndPlayer": "random-play", + "PromptType": "image", + "Wins-1st": 11, + "Wins-2nd": 1, + "Disqualifications-1st": 1, + "Disqualifications-2nd": 2, + "Draws": 0, + "InvalidMoves-1st": 50, + "InvalidMoves-2nd": 16, + "TotalMoves-1st": 176, + "TotalMoves-2nd": 129 + }, + { + "LLM1stPlayer": "gpt-4o", + "LLM2ndPlayer": "random-play", + "PromptType": "list", + "Wins-1st": 15, + "Wins-2nd": 0, + "Disqualifications-1st": 0, + "Disqualifications-2nd": 0, + "Draws": 0, + "InvalidMoves-1st": 2, + "InvalidMoves-2nd": 9, + "TotalMoves-1st": 78, + "TotalMoves-2nd": 70 + }, + { + "LLM1stPlayer": "meta.llama3-70b-instruct-v1:0", + "LLM2ndPlayer": "anthropic.claude-3-5-sonnet-20240620-v1:0", + "PromptType": "illustration", + "Wins-1st": 3, + "Wins-2nd": 5, + "Disqualifications-1st": 5, + "Disqualifications-2nd": 2, + "Draws": 0, + "InvalidMoves-1st": 85, + "InvalidMoves-2nd": 72, + "TotalMoves-1st": 185, + "TotalMoves-2nd": 167 + }, + { + "LLM1stPlayer": "meta.llama3-70b-instruct-v1:0", + "LLM2ndPlayer": "anthropic.claude-3-5-sonnet-20240620-v1:0", + "PromptType": "list", + "Wins-1st": 14, + "Wins-2nd": 1, + "Disqualifications-1st": 0, + "Disqualifications-2nd": 0, + "Draws": 0, + "InvalidMoves-1st": 0, + "InvalidMoves-2nd": 0, + "TotalMoves-1st": 88, + "TotalMoves-2nd": 74 + }, + { + "LLM1stPlayer": "meta.llama3-70b-instruct-v1:0", + "LLM2ndPlayer": "anthropic.claude-3-sonnet-20240229-v1:0", + "PromptType": "illustration", + "Wins-1st": 9, + "Wins-2nd": 5, + "Disqualifications-1st": 1, + "Disqualifications-2nd": 0, + "Draws": 0, + "InvalidMoves-1st": 80, + "InvalidMoves-2nd": 28, + "TotalMoves-1st": 198, + "TotalMoves-2nd": 137 + }, + { + "LLM1stPlayer": "meta.llama3-70b-instruct-v1:0", + "LLM2ndPlayer": "anthropic.claude-3-sonnet-20240229-v1:0", + "PromptType": "list", + "Wins-1st": 10, + "Wins-2nd": 4, + "Disqualifications-1st": 0, + "Disqualifications-2nd": 0, + "Draws": 1, + "InvalidMoves-1st": 1, + "InvalidMoves-2nd": 1, + "TotalMoves-1st": 117, + "TotalMoves-2nd": 106 + }, + { + "LLM1stPlayer": "meta.llama3-70b-instruct-v1:0", + "LLM2ndPlayer": "gemini-1.5-flash", + "PromptType": "illustration", + "Wins-1st": 5, + "Wins-2nd": 4, + "Disqualifications-1st": 0, + "Disqualifications-2nd": 4, + "Draws": 2, + "InvalidMoves-1st": 36, + "InvalidMoves-2nd": 85, + "TotalMoves-1st": 119, + "TotalMoves-2nd": 157 + }, + { + "LLM1stPlayer": "meta.llama3-70b-instruct-v1:0", + "LLM2ndPlayer": "gemini-1.5-flash", + "PromptType": "list", + "Wins-1st": 11, + "Wins-2nd": 4, + "Disqualifications-1st": 0, + "Disqualifications-2nd": 0, + "Draws": 0, + "InvalidMoves-1st": 0, + "InvalidMoves-2nd": 0, + "TotalMoves-1st": 83, + "TotalMoves-2nd": 72 + }, + { + "LLM1stPlayer": "meta.llama3-70b-instruct-v1:0", + "LLM2ndPlayer": "gemini-1.5-pro", + "PromptType": "illustration", + "Wins-1st": 2, + "Wins-2nd": 5, + "Disqualifications-1st": 2, + "Disqualifications-2nd": 6, + "Draws": 0, + "InvalidMoves-1st": 72, + "InvalidMoves-2nd": 106, + "TotalMoves-1st": 159, + "TotalMoves-2nd": 185 + }, + { + "LLM1stPlayer": "meta.llama3-70b-instruct-v1:0", + "LLM2ndPlayer": "gemini-1.5-pro", + "PromptType": "list", + "Wins-1st": 8, + "Wins-2nd": 7, + "Disqualifications-1st": 0, + "Disqualifications-2nd": 0, + "Draws": 0, + "InvalidMoves-1st": 1, + "InvalidMoves-2nd": 0, + "TotalMoves-1st": 89, + "TotalMoves-2nd": 80 + }, + { + "LLM1stPlayer": "meta.llama3-70b-instruct-v1:0", + "LLM2ndPlayer": "gpt-4-turbo", + "PromptType": "illustration", + "Wins-1st": 4, + "Wins-2nd": 4, + "Disqualifications-1st": 1, + "Disqualifications-2nd": 6, + "Draws": 0, + "InvalidMoves-1st": 53, + "InvalidMoves-2nd": 101, + "TotalMoves-1st": 133, + "TotalMoves-2nd": 171 + }, + { + "LLM1stPlayer": "meta.llama3-70b-instruct-v1:0", + "LLM2ndPlayer": "gpt-4-turbo", + "PromptType": "list", + "Wins-1st": 12, + "Wins-2nd": 3, + "Disqualifications-1st": 0, + "Disqualifications-2nd": 0, + "Draws": 0, + "InvalidMoves-1st": 2, + "InvalidMoves-2nd": 15, + "TotalMoves-1st": 91, + "TotalMoves-2nd": 92 + }, + { + "LLM1stPlayer": "meta.llama3-70b-instruct-v1:0", + "LLM2ndPlayer": "gpt-4o", + "PromptType": "illustration", + "Wins-1st": 9, + "Wins-2nd": 2, + "Disqualifications-1st": 3, + "Disqualifications-2nd": 0, + "Draws": 1, + "InvalidMoves-1st": 85, + "InvalidMoves-2nd": 46, + "TotalMoves-1st": 202, + "TotalMoves-2nd": 153 + }, + { + "LLM1stPlayer": "meta.llama3-70b-instruct-v1:0", + "LLM2ndPlayer": "gpt-4o", + "PromptType": "list", + "Wins-1st": 9, + "Wins-2nd": 6, + "Disqualifications-1st": 0, + "Disqualifications-2nd": 0, + "Draws": 0, + "InvalidMoves-1st": 1, + "InvalidMoves-2nd": 5, + "TotalMoves-1st": 98, + "TotalMoves-2nd": 93 + }, + { + "LLM1stPlayer": "meta.llama3-70b-instruct-v1:0", + "LLM2ndPlayer": "random-play", + "PromptType": "illustration", + "Wins-1st": 9, + "Wins-2nd": 1, + "Disqualifications-1st": 3, + "Disqualifications-2nd": 2, + "Draws": 0, + "InvalidMoves-1st": 81, + "InvalidMoves-2nd": 13, + "TotalMoves-1st": 183, + "TotalMoves-2nd": 104 + }, + { + "LLM1stPlayer": "meta.llama3-70b-instruct-v1:0", + "LLM2ndPlayer": "random-play", + "PromptType": "list", + "Wins-1st": 11, + "Wins-2nd": 1, + "Disqualifications-1st": 0, + "Disqualifications-2nd": 3, + "Draws": 0, + "InvalidMoves-1st": 0, + "InvalidMoves-2nd": 14, + "TotalMoves-1st": 71, + "TotalMoves-2nd": 71 + }, + { + "LLM1stPlayer": "random-play", + "LLM2ndPlayer": "anthropic.claude-3-5-sonnet-20240620-v1:0", + "PromptType": "illustration", + "Wins-1st": 4, + "Wins-2nd": 5, + "Disqualifications-1st": 2, + "Disqualifications-2nd": 4, + "Draws": 0, + "InvalidMoves-1st": 10, + "InvalidMoves-2nd": 74, + "TotalMoves-1st": 114, + "TotalMoves-2nd": 170 + }, + { + "LLM1stPlayer": "random-play", + "LLM2ndPlayer": "anthropic.claude-3-5-sonnet-20240620-v1:0", + "PromptType": "image", + "Wins-1st": 2, + "Wins-2nd": 5, + "Disqualifications-1st": 3, + "Disqualifications-2nd": 5, + "Draws": 0, + "InvalidMoves-1st": 24, + "InvalidMoves-2nd": 83, + "TotalMoves-1st": 146, + "TotalMoves-2nd": 198 + }, + { + "LLM1stPlayer": "random-play", + "LLM2ndPlayer": "anthropic.claude-3-5-sonnet-20240620-v1:0", + "PromptType": "list", + "Wins-1st": 3, + "Wins-2nd": 9, + "Disqualifications-1st": 3, + "Disqualifications-2nd": 0, + "Draws": 0, + "InvalidMoves-1st": 22, + "InvalidMoves-2nd": 4, + "TotalMoves-1st": 130, + "TotalMoves-2nd": 109 + }, + { + "LLM1stPlayer": "random-play", + "LLM2ndPlayer": "anthropic.claude-3-sonnet-20240229-v1:0", + "PromptType": "illustration", + "Wins-1st": 3, + "Wins-2nd": 7, + "Disqualifications-1st": 3, + "Disqualifications-2nd": 2, + "Draws": 0, + "InvalidMoves-1st": 50, + "InvalidMoves-2nd": 65, + "TotalMoves-1st": 268, + "TotalMoves-2nd": 278 + }, + { + "LLM1stPlayer": "random-play", + "LLM2ndPlayer": "anthropic.claude-3-sonnet-20240229-v1:0", + "PromptType": "image", + "Wins-1st": 1, + "Wins-2nd": 9, + "Disqualifications-1st": 3, + "Disqualifications-2nd": 2, + "Draws": 0, + "InvalidMoves-1st": 22, + "InvalidMoves-2nd": 55, + "TotalMoves-1st": 143, + "TotalMoves-2nd": 173 + }, + { + "LLM1stPlayer": "random-play", + "LLM2ndPlayer": "anthropic.claude-3-sonnet-20240229-v1:0", + "PromptType": "list", + "Wins-1st": 3, + "Wins-2nd": 9, + "Disqualifications-1st": 3, + "Disqualifications-2nd": 0, + "Draws": 0, + "InvalidMoves-1st": 19, + "InvalidMoves-2nd": 2, + "TotalMoves-1st": 148, + "TotalMoves-2nd": 128 + }, + { + "LLM1stPlayer": "random-play", + "LLM2ndPlayer": "gemini-1.5-flash", + "PromptType": "illustration", + "Wins-1st": 2, + "Wins-2nd": 6, + "Disqualifications-1st": 2, + "Disqualifications-2nd": 5, + "Draws": 0, + "InvalidMoves-1st": 13, + "InvalidMoves-2nd": 79, + "TotalMoves-1st": 112, + "TotalMoves-2nd": 171 + }, + { + "LLM1stPlayer": "random-play", + "LLM2ndPlayer": "gemini-1.5-flash", + "PromptType": "image", + "Wins-1st": 3, + "Wins-2nd": 8, + "Disqualifications-1st": 2, + "Disqualifications-2nd": 2, + "Draws": 0, + "InvalidMoves-1st": 12, + "InvalidMoves-2nd": 52, + "TotalMoves-1st": 111, + "TotalMoves-2nd": 146 + }, + { + "LLM1stPlayer": "random-play", + "LLM2ndPlayer": "gemini-1.5-flash", + "PromptType": "list", + "Wins-1st": 3, + "Wins-2nd": 9, + "Disqualifications-1st": 2, + "Disqualifications-2nd": 1, + "Draws": 0, + "InvalidMoves-1st": 14, + "InvalidMoves-2nd": 8, + "TotalMoves-1st": 129, + "TotalMoves-2nd": 119 + }, + { + "LLM1stPlayer": "random-play", + "LLM2ndPlayer": "gemini-1.5-pro", + "PromptType": "illustration", + "Wins-1st": 2, + "Wins-2nd": 7, + "Disqualifications-1st": 2, + "Disqualifications-2nd": 4, + "Draws": 0, + "InvalidMoves-1st": 14, + "InvalidMoves-2nd": 76, + "TotalMoves-1st": 88, + "TotalMoves-2nd": 144 + }, + { + "LLM1stPlayer": "random-play", + "LLM2ndPlayer": "gemini-1.5-pro", + "PromptType": "image", + "Wins-1st": 1, + "Wins-2nd": 10, + "Disqualifications-1st": 2, + "Disqualifications-2nd": 2, + "Draws": 0, + "InvalidMoves-1st": 14, + "InvalidMoves-2nd": 61, + "TotalMoves-1st": 132, + "TotalMoves-2nd": 176 + }, + { + "LLM1stPlayer": "random-play", + "LLM2ndPlayer": "gemini-1.5-pro", + "PromptType": "list", + "Wins-1st": 3, + "Wins-2nd": 9, + "Disqualifications-1st": 3, + "Disqualifications-2nd": 0, + "Draws": 0, + "InvalidMoves-1st": 14, + "InvalidMoves-2nd": 2, + "TotalMoves-1st": 111, + "TotalMoves-2nd": 96 + }, + { + "LLM1stPlayer": "random-play", + "LLM2ndPlayer": "gpt-4-turbo", + "PromptType": "illustration", + "Wins-1st": 3, + "Wins-2nd": 5, + "Disqualifications-1st": 3, + "Disqualifications-2nd": 4, + "Draws": 0, + "InvalidMoves-1st": 14, + "InvalidMoves-2nd": 84, + "TotalMoves-1st": 77, + "TotalMoves-2nd": 140 + }, + { + "LLM1stPlayer": "random-play", + "LLM2ndPlayer": "gpt-4-turbo", + "PromptType": "image", + "Wins-1st": 2, + "Wins-2nd": 6, + "Disqualifications-1st": 0, + "Disqualifications-2nd": 7, + "Draws": 0, + "InvalidMoves-1st": 10, + "InvalidMoves-2nd": 88, + "TotalMoves-1st": 104, + "TotalMoves-2nd": 173 + }, + { + "LLM1stPlayer": "random-play", + "LLM2ndPlayer": "gpt-4-turbo", + "PromptType": "list", + "Wins-1st": 4, + "Wins-2nd": 7, + "Disqualifications-1st": 3, + "Disqualifications-2nd": 1, + "Draws": 0, + "InvalidMoves-1st": 17, + "InvalidMoves-2nd": 31, + "TotalMoves-1st": 101, + "TotalMoves-2nd": 110 + }, + { + "LLM1stPlayer": "random-play", + "LLM2ndPlayer": "gpt-4o", + "PromptType": "illustration", + "Wins-1st": 3, + "Wins-2nd": 10, + "Disqualifications-1st": 0, + "Disqualifications-2nd": 2, + "Draws": 0, + "InvalidMoves-1st": 16, + "InvalidMoves-2nd": 67, + "TotalMoves-1st": 170, + "TotalMoves-2nd": 216 + }, + { + "LLM1stPlayer": "random-play", + "LLM2ndPlayer": "gpt-4o", + "PromptType": "image", + "Wins-1st": 2, + "Wins-2nd": 7, + "Disqualifications-1st": 4, + "Disqualifications-2nd": 2, + "Draws": 0, + "InvalidMoves-1st": 26, + "InvalidMoves-2nd": 61, + "TotalMoves-1st": 172, + "TotalMoves-2nd": 203 + }, + { + "LLM1stPlayer": "random-play", + "LLM2ndPlayer": "gpt-4o", + "PromptType": "list", + "Wins-1st": 3, + "Wins-2nd": 10, + "Disqualifications-1st": 2, + "Disqualifications-2nd": 0, + "Draws": 0, + "InvalidMoves-1st": 11, + "InvalidMoves-2nd": 4, + "TotalMoves-1st": 93, + "TotalMoves-2nd": 83 + }, + { + "LLM1stPlayer": "random-play", + "LLM2ndPlayer": "meta.llama3-70b-instruct-v1:0", + "PromptType": "illustration", + "Wins-1st": 1, + "Wins-2nd": 8, + "Disqualifications-1st": 2, + "Disqualifications-2nd": 4, + "Draws": 0, + "InvalidMoves-1st": 14, + "InvalidMoves-2nd": 91, + "TotalMoves-1st": 131, + "TotalMoves-2nd": 203 + }, + { + "LLM1stPlayer": "random-play", + "LLM2ndPlayer": "meta.llama3-70b-instruct-v1:0", + "PromptType": "list", + "Wins-1st": 5, + "Wins-2nd": 10, + "Disqualifications-1st": 0, + "Disqualifications-2nd": 0, + "Draws": 0, + "InvalidMoves-1st": 18, + "InvalidMoves-2nd": 1, + "TotalMoves-1st": 190, + "TotalMoves-2nd": 168 + } +] \ No newline at end of file diff --git a/leaderboard/leaderboard-script.js b/leaderboard/leaderboard-script.js index b5f0874..67cab28 100644 --- a/leaderboard/leaderboard-script.js +++ b/leaderboard/leaderboard-script.js @@ -1,13 +1,86 @@ -$(document).ready(function() { - const jsonURL = './leaderboard-data.json'; //'https://raw.githubusercontent.com/jackson-harper/JSONLLM/main/newLeaderboard.json'; +function formatDecimal(value) { + let number = parseFloat(value); + return Number.isInteger(number) ? number : number.toFixed(2); +} - function formatDecimal(value) { - const number = parseFloat(value); - return Number.isInteger(number) ? number : number.toFixed(2); - } +// Sanitize the column names to create valid and consistent IDs +function sanitizeColumnName(name) { + return name.replace(/\s+/g, '').replace(/[()]/g, '').toLowerCase(); +} + +function populateDropdown(columnIndex, listId, table) { + let columnData = table.column(columnIndex).data().unique().sort(); + let list = $(listId); + list.empty(); + // Add Select All option + let selectAllItem = $('
  • ') + .append('') + .append('Select All') + .click(function(event) { + let isChecked = $(this).hasClass('checked'); + event.stopPropagation(); // Prevent dropdown from closing + if (isChecked) { + list.find('.item').removeClass('checked'); + list.find('.checkbox').removeClass('checked'); + } else { + list.find('.item').addClass('checked'); + list.find('.checkbox').addClass('checked'); + } + filterTable(); + }); + list.append(selectAllItem); + + columnData.each(function(value) { + let item = $('
  • ') + .append('') + .append('' + value + '') + .click(function(event) { + event.stopPropagation(); // Prevent dropdown from closing + $(this).toggleClass('checked'); + $(this).find('.checkbox').toggleClass('checked'); + filterTable(table); + }); + list.append(item); + }); +} + +function filterTable(table) { + table.columns().every(function(index) { + let column = this; + // Create the appropriate ID for the dropdown list based on the column header text + // This ensures that the dropdown list ID matches the column header it is filtering + let headerTitle = sanitizeColumnName(column.header().textContent); + let selectedFilters = []; + $(`#${headerTitle}List .checked .item-text`).each(function() { + selectedFilters.push($.fn.dataTable.util.escapeRegex($(this).text())); + }); + let regex = selectedFilters.length ? selectedFilters.join('|') : ''; + console.log(`Filtering column ${index} with regex: ${regex}`); + column.search(regex ? '^(' + regex + ')$' : '', true, false).draw(); + }); +} + +// Function to close all dropdowns +function closeAllDropdowns() { + $('.list-items').hide(); +} + +// Function to empty the table's HTML in preparation for updating the table's data. +function emptyTableHTML() { + // Create a new placeholder column to store each column in the aggregated JSON file. + document.getElementById("mytable").innerHTML = "
  • \n" + + " \n" + + " \n" + + " \n" + + " \n" + + "
    \n"; +} + +function showOriginalTable() { + let jsonURL = './leaderboard-data.json'; //'https://raw.githubusercontent.com/jackson-harper/JSONLLM/main/newLeaderboard.json'; $.getJSON(jsonURL, function(data) { - const formattedData = data.map(item => [ + let formattedData = data.map(item => [ item.LLM1stPlayer, item.LLM2ndPlayer, item.PromptType, @@ -29,7 +102,9 @@ $(document).ready(function() { item.UUID ]); - const table = $('#mytable').DataTable({ + emptyTableHTML(formattedData); + + let table = $('#mytable').DataTable({ data: formattedData, columns: [ { title: "LLM (1st)" }, @@ -38,86 +113,39 @@ $(document).ready(function() { { title: "Prompt Version" }, { title: "Game Type"}, { title: "Win Ratio (1st)" }, - { title: "Win Ratio (2nd)"} , + { title: "Win Ratio (2nd)"}, { title: "Wins (1st)" }, { title: "Wins (2nd)" }, { title: "DQ (1st)" }, { title: "DQ (2nd)" }, { title: "Draws" }, - { title: "Invalid Moves Ratio (1st)" }, - { title: "Invalid Moves Ratio (2nd)" }, + { title: "Invalid Moves (1st)" }, + { title: "Invalid Moves (2nd)" }, { title: "Total Moves (1st)" }, { title: "Total Moves (2nd)" }, { title: "Provider Email" }, { title: "Date-Time" }, { title: "UUID" } ], - // Adjust positioning of dom to move search bar and + // Adjust positioning of dom to move search bar and dom: 'frtlpi', columnDefs: [ - { targets: [5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,17], className: 'dt-body-right' }, + { targets: [5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 17], className: 'dt-body-right' }, { targets: [0, 1, 2, 3, 4, 16], className: 'dt-body-center' }, { targets: [3, 16, 17, 18], visible: false} ] }); - // Sanitize the column names to create valid and consistent IDs - function sanitizeColumnName(name) { - return name.replace(/\s+/g, '').replace(/[()]/g, '').toLowerCase(); - } - - function populateDropdown(columnIndex, listId) { - const columnData = table.column(columnIndex).data().unique().sort(); - const list = $(listId); - list.empty(); - // Add Select All option - const selectAllItem = $('
  • ') - .append('') - .append('Select All') - .click(function(event) { - const isChecked = $(this).hasClass('checked'); - event.stopPropagation(); // Prevent dropdown from closing - if (isChecked) { - list.find('.item').removeClass('checked'); - list.find('.checkbox').removeClass('checked'); - } else { - list.find('.item').addClass('checked'); - list.find('.checkbox').addClass('checked'); - } - filterTable(); - }); - list.append(selectAllItem); - - columnData.each(function(value) { - const item = $('
  • ') - .append('') - .append('' + value + '') - .click(function(event) { - event.stopPropagation(); // Prevent dropdown from closing - $(this).toggleClass('checked'); - $(this).find('.checkbox').toggleClass('checked'); - filterTable(); - }); - list.append(item); - }); - } - - populateDropdown(0, '#llm1stList'); - populateDropdown(1, '#llm2ndList'); - populateDropdown(2, '#prompttypeList'); - populateDropdown(3, '#promptversionList'); - populateDropdown(4, '#gametypeList'); - - // Function to close all dropdowns - function closeAllDropdowns() { - $('.list-items').hide(); - } - + populateDropdown(0, '#llm1stList', table); + populateDropdown(1, '#llm2ndList', table); + populateDropdown(2, '#prompttypeList', table); + populateDropdown(3, '#promptversionList', table); + populateDropdown(4, '#gametypeList', table); + // Toggle dropdown visibility on select button click - $('.select-btn').click(function(event) { - event.stopPropagation(); - const list = $(this).next('.list-items'); - list.toggle(); + $('.select-btn').click(function(event) { + event.stopPropagation(); + $(this).next('.list-items').toggle(); $('.list-items').not(list).hide(); // Close other dropdowns }); @@ -125,35 +153,23 @@ $(document).ready(function() { $(document).click(function() { closeAllDropdowns(); }); - - function filterTable() { - table.columns().every(function(index) { - const column = this; - // Create the appropriate ID for the dropdown list based on the column header text - // This ensures that the dropdown list ID matches the column header it is filtering - const headerTitle = sanitizeColumnName(column.header().textContent); - const selectedFilters = []; - $(`#${headerTitle}List .checked .item-text`).each(function() { - selectedFilters.push($.fn.dataTable.util.escapeRegex($(this).text())); - }); - const regex = selectedFilters.length ? selectedFilters.join('|') : ''; - console.log(`Filtering column ${index} with regex: ${regex}`); - column.search(regex ? '^(' + regex + ')$' : '', true, false).draw(); - }); - } }).fail(function() { console.error("An error occurred while fetching the JSON data."); }); +} + +$(document).ready(function() { + showOriginalTable(); }); document.getElementById('downloadBtn').addEventListener('click', function() { // URL of the file to be downloaded - const fileUrl = './leaderboard-data.json'; + let fileUrl = './leaderboard-data.json'; // Name of the file to be saved as - const fileName = 'leaderboard-data.json'; + let fileName = 'leaderboard-data.json'; // Create an anchor element - const a = document.createElement('a'); + let a = document.createElement('a'); a.href = fileUrl; a.download = fileName; document.body.appendChild(a); @@ -161,3 +177,280 @@ document.getElementById('downloadBtn').addEventListener('click', function() { document.body.removeChild(a); }); +document.getElementById("aggregate-gametype-prompttype-llm2-btn").addEventListener('click', function() { + $("#mytable").DataTable().destroy(); // Delete the existing table. + + // If the button's text does not start with "De-", or in other words, it doesn't say "De-Aggregate", then aggregate the data. Otherwise, de-aggregate the data. + if (!document.getElementById("aggregate-gametype-prompttype-llm2-btn").innerText.startsWith("De-")) { + // Update button name and disable other aggregation buttons. + document.getElementById("aggregate-gametype-prompttype-llm2-btn").innerText = "De-" + document.getElementById("aggregate-gametype-prompttype-llm2-btn").innerText; + document.getElementById("aggregate-gametype-btn").disabled = true; + document.getElementById("aggregate-gametype-prompttype-btn").disabled = true; + + let jsonURL = './leaderboard-data-agg-gametype-prompttype-llm2.json'; //'https://raw.githubusercontent.com/jackson-harper/JSONLLM/main/newLeaderboard.json'; + + $.getJSON(jsonURL, function(data) { + let formattedData = data.map(item => [ + item.LLM1stPlayer, + item["Wins-1st"], + item["Disqualifications-1st"], + item["Disqualifications-2nd"], + item.Draws, + item["InvalidMoves-1st"], + item["InvalidMoves-2nd"], + item["TotalMoves-1st"], + item["TotalMoves-2nd"], + ]); + + emptyTableHTML(formattedData); + + // Create a new table with the columns available in the aggregated data JSON file. + let table = $('#mytable').DataTable({ + data: formattedData, + columns: [ + { title: "LLM (1st)" }, + { title: "Wins (1st)" }, + { title: "DQ (1st)" }, + { title: "DQ (2nd)" }, + { title: "Draws" }, + { title: "Invalid Moves (1st)" }, + { title: "Invalid Moves (2nd)" }, + { title: "Total Moves (1st)" }, + { title: "Total Moves (2nd)" }, + ], + // Adjust positioning of dom to move search bar and + dom: 'frtlpi', + columnDefs: [ + { targets: [1, 2, 3, 4, 5, 6, 7, 8], className: 'dt-body-right' }, + { targets: [0], className: 'dt-body-center' }, + ] + }); + + populateDropdown(0, '#llm1stList', table); + $('.container').each(function () { + if ($(this).text().includes('LLM (2nd)') || $(this).text().includes('Prompt Type') || $(this).text().includes('Game Type')) { + $(this).hide(); + } + }); + + // Toggle dropdown visibility on select button click + $('.select-btn').click(function(event) { + event.stopPropagation(); + $(this).next('.list-items').toggle(); + $('.list-items').not(list).hide(); // Close other dropdowns + }); + + // Close dropdowns when clicking outside + $(document).click(function() { + closeAllDropdowns(); + }); + }).fail(function() { + console.error("An error occurred while fetching the JSON data."); + }); + } + else { + // If we are de-aggregating the data, update the button's title, re-enable other aggregation buttons, un-hide the applicable select dropdowns, and re-show the original table. + document.getElementById("aggregate-gametype-prompttype-llm2-btn").innerText = document.getElementById("aggregate-gametype-prompttype-llm2-btn").innerText.replace("De-", ""); + + document.getElementById("aggregate-gametype-btn").disabled = false; + document.getElementById("aggregate-gametype-prompttype-btn").disabled = false; + + $('.container').each(function () { + if ($(this).text().includes('LLM (2nd)') || $(this).text().includes('Prompt Type') || $(this).text().includes('Game Type')) { + $(this).show(); + } + }); + + showOriginalTable(); + } +}); + +document.getElementById("aggregate-gametype-prompttype-btn").addEventListener('click', function() { + $("#mytable").DataTable().destroy(); // Delete the existing table. + + // If the button's text does not start with "De-", or in other words, it doesn't say "De-Aggregate", then aggregate the data. Otherwise, de-aggregate the data. + if (!document.getElementById("aggregate-gametype-prompttype-btn").innerText.startsWith("De-")) { + // Update button name and disable other aggregation buttons. + document.getElementById("aggregate-gametype-prompttype-btn").innerText = "De-" + document.getElementById("aggregate-gametype-prompttype-btn").innerText; + document.getElementById("aggregate-gametype-btn").disabled = true; + document.getElementById("aggregate-gametype-prompttype-llm2-btn").disabled = true; + + let jsonURL = './leaderboard-data-agg-gametype-prompttype.json'; //'https://raw.githubusercontent.com/jackson-harper/JSONLLM/main/newLeaderboard.json'; + + $.getJSON(jsonURL, function(data) { + let formattedData = data.map(item => [ + item.LLM1stPlayer, + item.LLM2ndPlayer, + item["Wins-1st"], + item["Wins-2nd"], + item["Disqualifications-1st"], + item["Disqualifications-2nd"], + item.Draws, + item["InvalidMoves-1st"], + item["InvalidMoves-2nd"], + item["TotalMoves-1st"], + item["TotalMoves-2nd"], + ]); + + emptyTableHTML(formattedData); + + // Create a new table with the columns available in the aggregated data JSON file. + let table = $('#mytable').DataTable({ + data: formattedData, + columns: [ + { title: "LLM (1st)" }, + { title: "LLM (2nd)" }, + { title: "Wins (1st)" }, + { title: "Wins (2nd)" }, + { title: "DQ (1st)" }, + { title: "DQ (2nd)" }, + { title: "Draws" }, + { title: "Invalid Moves (1st)" }, + { title: "Invalid Moves (2nd)" }, + { title: "Total Moves (1st)" }, + { title: "Total Moves (2nd)" }, + ], + // Adjust positioning of dom to move search bar and + dom: 'frtlpi', + columnDefs: [ + { targets: [2, 3, 4, 5, 6, 7, 8, 9, 10], className: 'dt-body-right' }, + { targets: [0, 1], className: 'dt-body-center' }, + ] + }); + + populateDropdown(0, '#llm1stList', table); + populateDropdown(1, '#llm2ndList', table); + $('.container').each(function () { + if ($(this).text().includes('Game Type') || $(this).text().includes('Prompt Type')) { + $(this).hide(); + } + }); + + // Toggle dropdown visibility on select button click + $('.select-btn').click(function(event) { + event.stopPropagation(); + $(this).next('.list-items').toggle(); + $('.list-items').not(list).hide(); // Close other dropdowns + }); + + // Close dropdowns when clicking outside + $(document).click(function() { + closeAllDropdowns(); + }); + }).fail(function() { + console.error("An error occurred while fetching the JSON data."); + }); + } + else { + // If we are de-aggregating the data, update the button's title, re-enable other aggregation buttons, un-hide the applicable select dropdowns, and re-show the original table. + document.getElementById("aggregate-gametype-prompttype-btn").innerText = document.getElementById("aggregate-gametype-prompttype-btn").innerText.replace("De-", ""); + + document.getElementById("aggregate-gametype-btn").disabled = false; + document.getElementById("aggregate-gametype-prompttype-llm2-btn").disabled = false; + + $('.container').each(function () { + if ($(this).text().includes('Game Type') || $(this).text().includes('Prompt Type')) { + $(this).show(); + } + }); + + showOriginalTable(); + } +}); + +document.getElementById("aggregate-gametype-btn").addEventListener('click', function() { + $("#mytable").DataTable().destroy(); // Delete the existing table. + + // If the button's text does not start with "De-", or in other words, it doesn't say "De-Aggregate", then aggregate the data. Otherwise, de-aggregate the data. + if (!document.getElementById("aggregate-gametype-btn").innerText.startsWith("De-")) { + // Update button name and disable other aggregation buttons. + document.getElementById("aggregate-gametype-btn").innerText = "De-" + document.getElementById("aggregate-gametype-btn").innerText; + document.getElementById("aggregate-gametype-prompttype-btn").disabled = true; + document.getElementById("aggregate-gametype-prompttype-llm2-btn").disabled = true; + + let jsonURL = './leaderboard-data-agg-gametype.json'; //'https://raw.githubusercontent.com/jackson-harper/JSONLLM/main/newLeaderboard.json'; + + $.getJSON(jsonURL, function(data) { + let formattedData = data.map(item => [ + item.LLM1stPlayer, + item.LLM2ndPlayer, + item.PromptType, + item["Wins-1st"], + item["Wins-2nd"], + item["Disqualifications-1st"], + item["Disqualifications-2nd"], + item.Draws, + item["InvalidMoves-1st"], + item["InvalidMoves-2nd"], + item["TotalMoves-1st"], + item["TotalMoves-2nd"], + ]); + + emptyTableHTML(formattedData); + + // Create a new table with the columns available in the aggregated data JSON file. + let table = $('#mytable').DataTable({ + data: formattedData, + columns: [ + { title: "LLM (1st)" }, + { title: "LLM (2nd)" }, + { title: "Prompt Type" }, + { title: "Wins (1st)" }, + { title: "Wins (2nd)" }, + { title: "DQ (1st)" }, + { title: "DQ (2nd)" }, + { title: "Draws" }, + { title: "Invalid Moves (1st)" }, + { title: "Invalid Moves (2nd)" }, + { title: "Total Moves (1st)" }, + { title: "Total Moves (2nd)" }, + ], + // Adjust positioning of dom to move search bar and + dom: 'frtlpi', + columnDefs: [ + { targets: [3, 4, 5, 6, 7, 8, 9, 10, 11], className: 'dt-body-right' }, + { targets: [0, 1, 2], className: 'dt-body-center' }, + ] + }); + + populateDropdown(0, '#llm1stList', table); + populateDropdown(1, '#llm2ndList', table); + populateDropdown(2, '#prompttypeList', table); + $('.container').each(function () { + if ($(this).text().includes('Game Type')) { + $(this).hide(); + } + }); + + // Toggle dropdown visibility on select button click + $('.select-btn').click(function(event) { + event.stopPropagation(); + $(this).next('.list-items').toggle(); + $('.list-items').not(list).hide(); // Close other dropdowns + }); + + // Close dropdowns when clicking outside + $(document).click(function() { + closeAllDropdowns(); + }); + }).fail(function() { + console.error("An error occurred while fetching the JSON data."); + }); + } + else { + // If we are de-aggregating the data, update the button's title, re-enable other aggregation buttons, un-hide the applicable select dropdowns, and re-show the original table. + document.getElementById("aggregate-gametype-btn").innerText = document.getElementById("aggregate-gametype-btn").innerText.replace("De-", ""); + + document.getElementById("aggregate-gametype-prompttype-btn").disabled = false; + document.getElementById("aggregate-gametype-prompttype-llm2-btn").disabled = false; + + $('.container').each(function () { + if ($(this).text().includes('Game Type')) { + $(this).show(); + } + }); + + showOriginalTable(); + } +}); + diff --git a/leaderboard/leaderboard-styles.css b/leaderboard/leaderboard-styles.css index 411bc0e..f1adc10 100644 --- a/leaderboard/leaderboard-styles.css +++ b/leaderboard/leaderboard-styles.css @@ -127,7 +127,7 @@ h1 { float: left !important; } - h2 { +h2 { margin-left: 10px; font-family: 'Tahoma', sans-serif; font-style: normal; @@ -142,7 +142,7 @@ h3 { font-size:medium; } - .table-container { +.table-container { width: fit-content; /* Adjust the width */ margin: 0; /* Center the container */ box-sizing: border-box; @@ -160,6 +160,18 @@ h3 { font-size: medium; } +#aggregate-btn-container { + display: inline; + padding-top: 20px; + padding-left: 210px; +} + +.aggregate-btn { + max-width: 195px; + display: inline; + margin-bottom: 10px; +} + /* Style the submit button .submit-btn { padding: 10px 20px;