@meta
  v: 1
  route: /model/llama-3-1-405b
  generated: 2026-06-10T09:09:52.501Z

@intent
  purpose:    Show what hardware can run Llama 3.1 405B.
  audience:   ai-engineer, self-hoster, model-evaluator
  capability: inspect_model_specs, find_compatible_gpus, compare_similar_models, open_huggingface

@state
  slug: llama-3-1-405b
  name: Llama 3.1 405B
  hf_repo: meta-llama/Llama-3.1-405B-Instruct
  params_b: 405
  active_params_b: ~
  family: Meta
  type: dense
  context_k: 128
  fp16_gb: 810
  summary: The big one. 810GB at FP16 puts it in DGX or multi-GPU territory. Q3-Q4 quants fit on 2x H100 NVL or M3 Ultra 512.
  smallest_fitting_gpu_q4: m3-ultra-512
  smallest_fitting_gpu_fp16: ~

@actions
  - id: open_huggingface
    method: GET
    href: https://huggingface.co/meta-llama/Llama-3.1-405B-Instruct
  - id: view_compatible_gpus
    method: GET
    href: /model/llama-3-1-405b
  - id: compare_similar
    method: GET
    href: /model/mixtral-8x22b
  - id: view_calculator
    method: GET
    href: /#calculator

@context
  > The big one. 810GB at FP16 puts it in DGX or multi-GPU territory. Q3-Q4 quants fit on 2x H100 NVL or M3 Ultra 512.

@nav
  self:      /model/llama-3-1-405b
  parents:   [/]
  peers:     [/model/mixtral-8x22b, /model/deepseek-v3, /model/deepseek-r1, /model/gpt-oss-120b]
  drilldown: /the-math
