🌍 Multilingual MMLU Benchmark Leaderboard: This leaderboard is dedicated to evaluating and comparing the multilingual capabilities of large language models across different languages and cultures.

🔬 MMMLU Dataset: The dataset used for evaluation is OpenAI MMMLU Benchmark, which covers a broad range of topics from 57 different categories, covering elementary-level knowledge up to advanced professional subjects like law, physics, history, and computer science. MMMLU contains 14 languages: AR_XY (Arabic), BN_BD (Bengali), DE_DE (German), ES_LA (Spanish), FR_FR (French), HI_IN (Hindi), ID_ID (Indonesian), IT_IT (Italian), JA_JP (Japanese), KO_KR (Korean), PT_BR (Brazilian Portuguese), SW_KE (Swahili), YO_NG (Yoruba), ZH_CN (Simplified Chinese).

🎯 Our Goal is to raise awareness about the importance of improving the performance of LLMs across various languages, with a particular focus on cultural contexts. We strive to make LLM more inclusive and effective for users worldwide.

{
  • "headers": [
    • "T",
    • "Model",
    • "Average ⬆️",
    • "AR",
    • "BN",
    • "DE",
    • "ES",
    • "FR",
    • "HI",
    • "ID",
    • "IT",
    • "JA",
    • "KO",
    • "PT",
    • "SW",
    • "YO",
    • "ZH",
    • "Type",
    • "Architecture",
    • "Precision",
    • "Hub License",
    • "#Params (B)",
    • "Hub ❤️",
    • "Available on the hub",
    • "Model sha"
    ],
  • "data": [
    • [
      • "⭕",
      • "<a target="_blank" href="https://huggingface.co/Anthropic/Claude-3.5-Sonnet" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">Anthropic/Claude-3.5-Sonnet</a>",
      • 77.39,
      • 78.48,
      • 74.63,
      • 81.74,
      • 82.77,
      • 82.37,
      • 75.96,
      • 80.49,
      • 81.66,
      • 79.43,
      • 78.95,
      • 82.73,
      • 71.36,
      • 54.46,
      • 78.41,
      • "instruction-tuned",
      • "?",
      • "bfloat16",
      • "Claude-3.5-Sonnet",
      • 0,
      • 10000,
      • false,
      • "main"
      ],
    • [
      • "⭕",
      • "<a target="_blank" href="https://huggingface.co/AIDC/Macro-72B-Chat" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">AIDC/Macro-72B-Chat</a>",
      • 76.06,
      • 79.33,
      • 76.56,
      • 80.67,
      • 82.56,
      • 80.67,
      • 76.86,
      • 79.2,
      • 81.58,
      • 79.16,
      • 78.77,
      • 81.74,
      • 63.67,
      • 43.96,
      • 80.07,
      • "instruction-tuned",
      • "?",
      • "bfloat16",
      • "AIDC",
      • 72.7,
      • 0,
      • false,
      • "main"
      ],
    • [
      • "⭕",
      • "<a target="_blank" href="https://huggingface.co/meta-llama/Llama-3.1-70B-Instruct" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">meta-llama/Llama-3.1-70B-Instruct</a>",
      • 71.67,
      • 71.08,
      • 66.51,
      • 77,
      • 79.27,
      • 77.92,
      • 72.67,
      • 75.69,
      • 77.83,
      • 73.79,
      • 72.74,
      • 78.89,
      • 63.99,
      • 41.16,
      • 74.79,
      • "instruction-tuned",
      • "?",
      • "bfloat16",
      • "llama3.1",
      • 70.6,
      • 673,
      • false,
      • "main"
      ],
    • [
      • "⭕",
      • "<a target="_blank" href="https://huggingface.co/openai/GPT4-0125" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">openai/GPT4-0125</a>",
      • 70.78,
      • 71.12,
      • 64.81,
      • 75.72,
      • 76.79,
      • 75.82,
      • 70.13,
      • 73.68,
      • 75.84,
      • 71.64,
      • 71.32,
      • 76.17,
      • 68.08,
      • 47.26,
      • 72.5,
      • "instruction-tuned",
      • "?",
      • "bfloat16",
      • "openai",
      • 0,
      • 10000,
      • false,
      • "main"
      ],
    • [
      • "⭕",
      • "<a target="_blank" href="https://huggingface.co/Qwen/Qwen2-72B-Instruct" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">Qwen/Qwen2-72B-Instruct</a>",
      • 69.15,
      • 72.02,
      • 68.26,
      • 74.36,
      • 77.01,
      • 75.63,
      • 69.87,
      • 73.12,
      • 75.26,
      • 74.05,
      • 72.35,
      • 76.83,
      • 47.31,
      • 34.64,
      • 77.45,
      • "instruction-tuned",
      • "Qwen2ForCausalLM",
      • "bfloat16",
      • "tongyi-qianwen",
      • 72.7,
      • 675,
      • true,
      • "main"
      ],
    • [
      • "⭕",
      • "<a target="_blank" href="https://huggingface.co/Qwen/Qwen2.5-72B-Instruct" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">Qwen/Qwen2.5-72B-Instruct</a>",
      • 69.05,
      • 74.31,
      • 67.15,
      • 72.46,
      • 77.52,
      • 75.98,
      • 69.05,
      • 73.3,
      • 72.54,
      • 74.65,
      • 71.78,
      • 76.85,
      • 48.84,
      • 35.51,
      • 76.71,
      • "instruction-tuned",
      • "Qwen2ForCausalLM",
      • "bfloat16",
      • "tongyi-qianwen",
      • 72.7,
      • 452,
      • true,
      • "main"
      ],
    • [
      • "⭕",
      • "<a target="_blank" href="https://huggingface.co/meta-llama/Meta-Llama-3-70B-Instruct" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">meta-llama/Meta-Llama-3-70B-Instruct</a>",
      • 64.3,
      • 60.63,
      • 53.77,
      • 71.42,
      • 74.3,
      • 73.17,
      • 65.02,
      • 70.59,
      • 73.33,
      • 65.55,
      • 64.51,
      • 73.74,
      • 51.06,
      • 33.62,
      • 69.5,
      • "instruction-tuned",
      • "?",
      • "bfloat16",
      • "llama3",
      • 70.6,
      • 1430,
      • false,
      • "main"
      ],
    • [
      • "⭕",
      • "<a target="_blank" href="https://huggingface.co/openai/GPT4o-mini" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">openai/GPT4o-mini</a>",
      • 62.63,
      • 62.65,
      • 59.95,
      • 67.96,
      • 68.22,
      • 67.49,
      • 62.22,
      • 66.14,
      • 68.27,
      • 64.41,
      • 63.55,
      • 68.84,
      • 53.11,
      • 38.04,
      • 65.92,
      • "instruction-tuned",
      • "?",
      • "bfloat16",
      • "openai",
      • 0,
      • 10000,
      • false,
      • "main"
      ],
    • [
      • "⭕",
      • "<a target="_blank" href="https://huggingface.co/AIDC/Macro-7B-Chat" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">AIDC/Macro-7B-Chat</a>",
      • 60.05,
      • 60.57,
      • 54.36,
      • 65.92,
      • 67.74,
      • 67.58,
      • 54.34,
      • 62.35,
      • 65.42,
      • 64.19,
      • 62.95,
      • 67.61,
      • 43.93,
      • 37.18,
      • 66.54,
      • "instruction-tuned",
      • "?",
      • "bfloat16",
      • "AIDC",
      • 7.62,
      • 0,
      • false,
      • "main"
      ],
    • [
      • "⭕",
      • "<a target="_blank" href="https://huggingface.co/CohereForAI/aya-expanse-32b" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">CohereForAI/aya-expanse-32b</a>",
      • 58.92,
      • 61.57,
      • 43.9,
      • 64.71,
      • 67.53,
      • 67.46,
      • 58.75,
      • 65.43,
      • 66.46,
      • 64.34,
      • 62.43,
      • 67.19,
      • 38.36,
      • 33.39,
      • 63.41,
      • "instruction-tuned",
      • "?",
      • "float16",
      • "cc-by-nc-4.0",
      • 32.3,
      • 165,
      • false,
      • "main"
      ],
    • [
      • "⭕",
      • "<a target="_blank" href="https://huggingface.co/Qwen/Qwen2.5-7B-Instruct" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">Qwen/Qwen2.5-7B-Instruct</a>",
      • 56.08,
      • 56.4,
      • 45.32,
      • 62.06,
      • 65.62,
      • 64.88,
      • 47.39,
      • 61.66,
      • 65.09,
      • 60.75,
      • 59.31,
      • 64.43,
      • 35.38,
      • 32.32,
      • 64.44,
      • "instruction-tuned",
      • "Qwen2ForCausalLM",
      • "bfloat16",
      • "tongyi-qianwen",
      • 7.62,
      • 255,
      • true,
      • "main"
      ],
    • [
      • "⭕",
      • "<a target="_blank" href="https://huggingface.co/Qwen/Qwen2-7B-Instruct" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">Qwen/Qwen2-7B-Instruct</a>",
      • 51.95,
      • 50.71,
      • 43.36,
      • 57.14,
      • 60.16,
      • 60.83,
      • 45.12,
      • 54.12,
      • 58.99,
      • 56.55,
      • 53.98,
      • 60.11,
      • 34.35,
      • 30.17,
      • 61.78,
      • "instruction-tuned",
      • "Qwen2ForCausalLM",
      • "bfloat16",
      • "tongyi-qianwen",
      • 7.62,
      • 583,
      • true,
      • "main"
      ],
    • [
      • "⭕",
      • "<a target="_blank" href="https://huggingface.co/CohereForAI/aya-23-35B" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">CohereForAI/aya-23-35B</a>",
      • 50.13,
      • 51.84,
      • 32.94,
      • 55.45,
      • 57.99,
      • 58.08,
      • 47.58,
      • 55.5,
      • 57.81,
      • 54.53,
      • 53.7,
      • 58.33,
      • 33.58,
      • 30.4,
      • 54.06,
      • "instruction-tuned",
      • "?",
      • "float16",
      • "cc-by-nc-4.0",
      • 35,
      • 264,
      • false,
      • "main"
      ],
    • [
      • "⭕",
      • "<a target="_blank" href="https://huggingface.co/meta-llama/Llama-3.1-8B-Instruct" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">meta-llama/Llama-3.1-8B-Instruct</a>",
      • 50.01,
      • 42.19,
      • 38.8,
      • 55.63,
      • 59.14,
      • 58.94,
      • 45.84,
      • 54.27,
      • 56.27,
      • 52.07,
      • 50.78,
      • 59.02,
      • 40.28,
      • 31.36,
      • 55.52,
      • "instruction-tuned",
      • "?",
      • "bfloat16",
      • "llama3.1",
      • 8.03,
      • 3020,
      • false,
      • "main"
      ],
    • [
      • "⭕",
      • "<a target="_blank" href="https://huggingface.co/CohereForAI/aya-expanse-8b" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">CohereForAI/aya-expanse-8b</a>",
      • 48.2,
      • 48.75,
      • 33.36,
      • 53.91,
      • 56.07,
      • 55.5,
      • 46.2,
      • 53.34,
      • 55.28,
      • 51.49,
      • 50.67,
      • 55.83,
      • 31.96,
      • 29.88,
      • 52.52,
      • "instruction-tuned",
      • "?",
      • "float16",
      • "cc-by-nc-4.0",
      • 8.03,
      • 271,
      • false,
      • "main"
      ],
    • [
      • "⭕",
      • "<a target="_blank" href="https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">meta-llama/Meta-Llama-3-8B-Instruct</a>",
      • 46.57,
      • 40.54,
      • 36.43,
      • 53.52,
      • 55.8,
      • 55.79,
      • 41.43,
      • 51,
      • 53.33,
      • 42.31,
      • 46.54,
      • 55.46,
      • 37.5,
      • 30.96,
      • 51.42,
      • "instruction-tuned",
      • "?",
      • "bfloat16",
      • "llama3",
      • 8.03,
      • 3600,
      • false,
      • "main"
      ],
    • [
      • "⭕",
      • "<a target="_blank" href="https://huggingface.co/CohereForAI/aya-23-8B" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">CohereForAI/aya-23-8B</a>",
      • 40.96,
      • 42.07,
      • 27.43,
      • 43.26,
      • 47.86,
      • 46.87,
      • 38.87,
      • 46.7,
      • 47.07,
      • 44.57,
      • 43.64,
      • 46.86,
      • 26.17,
      • 26.44,
      • 45.7,
      • "instruction-tuned",
      • "?",
      • "float16",
      • "cc-by-nc-4.0",
      • 8.03,
      • 391,
      • false,
      • "main"
      ]
    ],
  • "metadata": null
}