<?xml version="1.0" encoding="UTF-8"?>
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
<url>
<loc>https://megacpp.com/</loc>
<changefreq>daily</changefreq>
<priority>1</priority>
</url>
<url>
<loc>https://megacpp.com/about/</loc>
<changefreq>weekly</changefreq>
<priority>0.7</priority>
</url>
<url>
<loc>https://megacpp.com/blog/</loc>
<lastmod>2026-04-20T00:00:00.000Z</lastmod>
<changefreq>daily</changefreq>
<priority>0.9</priority>
</url>
<url>
<loc>https://megacpp.com/blog/topics/</loc>
<lastmod>2026-04-20T00:00:00.000Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.7</priority>
</url>
<url>
<loc>https://megacpp.com/blog/topics/data-pipelines/</loc>
<lastmod>2026-04-20T00:00:00.000Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.7</priority>
</url>
<url>
<loc>https://megacpp.com/blog/topics/h200/</loc>
<lastmod>2026-04-20T00:00:00.000Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.7</priority>
</url>
<url>
<loc>https://megacpp.com/blog/topics/moe-systems/</loc>
<lastmod>2026-04-20T00:00:00.000Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.7</priority>
</url>
<url>
<loc>https://megacpp.com/blog/topics/tpu-xla/</loc>
<lastmod>2026-04-20T00:00:00.000Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.7</priority>
</url>
<url>
<loc>https://megacpp.com/contact/</loc>
<changefreq>weekly</changefreq>
<priority>0.7</priority>
</url>
<url>
<loc>https://megacpp.com/docs/</loc>
<changefreq>weekly</changefreq>
<priority>0.7</priority>
</url>
<url>
<loc>https://megacpp.com/docs/slm/</loc>
<changefreq>weekly</changefreq>
<priority>0.7</priority>
</url>
<url>
<loc>https://megacpp.com/people/</loc>
<changefreq>weekly</changefreq>
<priority>0.7</priority>
</url>
<url>
<loc>https://megacpp.com/products/</loc>
<changefreq>weekly</changefreq>
<priority>0.7</priority>
</url>
<url>
<loc>https://megacpp.com/products/slm/</loc>
<changefreq>weekly</changefreq>
<priority>0.9</priority>
</url>
<url>
<loc>https://megacpp.com/blog/gb10-blackwell-tensor-paths-what-we-actually-proved/</loc>
<lastmod>2026-04-20T00:00:00.000Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://megacpp.com/blog/gb10-driver-gates-and-false-capability-signals/</loc>
<lastmod>2026-04-20T00:00:00.000Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://megacpp.com/blog/gb10-libcuda-driver-patch-lane-and-why-it-still-is-not-silicon-proof/</loc>
<lastmod>2026-04-20T00:00:00.000Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://megacpp.com/blog/gb10-sm100a-cubin-patch-repro/</loc>
<lastmod>2026-04-20T00:00:00.000Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://megacpp.com/blog/author-mamba3-spec/</loc>
<lastmod>2026-04-19T00:00:00.000Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://megacpp.com/blog/clustered-sparse-on-tpu-the-planner-stages/</loc>
<lastmod>2026-04-19T00:00:00.000Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://megacpp.com/blog/converting-parquet-token-shards-into-megatron-indexed-datasets/</loc>
<lastmod>2026-04-19T00:00:00.000Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://megacpp.com/blog/dsa-cuda-graph-safety/</loc>
<lastmod>2026-04-19T00:00:00.000Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://megacpp.com/blog/dsa-cuda-graph-safety-deep-dive/</loc>
<lastmod>2026-04-19T00:00:00.000Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://megacpp.com/blog/dsa-index-cache-patch/</loc>
<lastmod>2026-04-19T00:00:00.000Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://megacpp.com/blog/dsa-indexer-memory-fix/</loc>
<lastmod>2026-04-19T00:00:00.000Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://megacpp.com/blog/dsa-indexer-memory-fix-deep-dive/</loc>
<lastmod>2026-04-19T00:00:00.000Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://megacpp.com/blog/fail-closed-hybrid-pattern-translation/</loc>
<lastmod>2026-04-19T00:00:00.000Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://megacpp.com/blog/gateskip-and-flexidepth-after-the-router/</loc>
<lastmod>2026-04-19T00:00:00.000Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://megacpp.com/blog/gb10-stack-parity-for-megacpp/</loc>
<lastmod>2026-04-19T00:00:00.000Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://megacpp.com/blog/how-to-express-a-nemotron-style-recipe-as-pure-megatron-cli/</loc>
<lastmod>2026-04-19T00:00:00.000Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://megacpp.com/blog/libtpu-pjrt-jax-ownership-boundaries/</loc>
<lastmod>2026-04-19T00:00:00.000Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://megacpp.com/blog/liger-flce-reduction-none/</loc>
<lastmod>2026-04-19T00:00:00.000Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://megacpp.com/blog/mamba-linear-ce-parity-deep-dive/</loc>
<lastmod>2026-04-19T00:00:00.000Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://megacpp.com/blog/mamba3-mimo-3d-to-2d-smem-deep-dive/</loc>
<lastmod>2026-04-19T00:00:00.000Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://megacpp.com/blog/mamba3-psiv-cache-scaffold/</loc>
<lastmod>2026-04-19T00:00:00.000Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://megacpp.com/blog/megatron-binidx-pipeline/</loc>
<lastmod>2026-04-19T00:00:00.000Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://megacpp.com/blog/megatron-flce-on-hopper/</loc>
<lastmod>2026-04-19T00:00:00.000Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://megacpp.com/blog/migration-policy-native-megatron-vs-narrow-custom-seams/</loc>
<lastmod>2026-04-19T00:00:00.000Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://megacpp.com/blog/nam56r-launch-policy/</loc>
<lastmod>2026-04-19T00:00:00.000Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://megacpp.com/blog/nam56r-megatron-translation/</loc>
<lastmod>2026-04-19T00:00:00.000Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://megacpp.com/blog/packed-rows-as-the-real-training-contract/</loc>
<lastmod>2026-04-19T00:00:00.000Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://megacpp.com/blog/protobuf-2gb-limits-and-streaming-checkpoints/</loc>
<lastmod>2026-04-19T00:00:00.000Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://megacpp.com/blog/public-safe-mla-integration-patterns-for-megatron/</loc>
<lastmod>2026-04-19T00:00:00.000Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://megacpp.com/blog/regional-compile-without-losing-the-plot/</loc>
<lastmod>2026-04-19T00:00:00.000Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://megacpp.com/blog/restoration-without-git-history/</loc>
<lastmod>2026-04-19T00:00:00.000Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://megacpp.com/blog/restoring-a-megatron-training-tree-without-git-history/</loc>
<lastmod>2026-04-19T00:00:00.000Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://megacpp.com/blog/shared-mla-adapter-boundaries/</loc>
<lastmod>2026-04-19T00:00:00.000Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://megacpp.com/blog/sparse-mla-dimension-generalization/</loc>
<lastmod>2026-04-19T00:00:00.000Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://megacpp.com/blog/sparse-mla-fp8-dispatch/</loc>
<lastmod>2026-04-19T00:00:00.000Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://megacpp.com/blog/tilelang-tma-and-h200-reality/</loc>
<lastmod>2026-04-19T00:00:00.000Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://megacpp.com/blog/tilelang-tma-bulk-copy-3d-smem-deep-dive/</loc>
<lastmod>2026-04-19T00:00:00.000Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://megacpp.com/blog/torch-2-13-on-gb10-serving-and-training-stack/</loc>
<lastmod>2026-04-19T00:00:00.000Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://megacpp.com/blog/torch-212-xla-breakage-matrix/</loc>
<lastmod>2026-04-19T00:00:00.000Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://megacpp.com/blog/torch-xla-2-11-tpu-reality/</loc>
<lastmod>2026-04-19T00:00:00.000Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://megacpp.com/blog/vllm-gb10-overlay-and-disabled-paths/</loc>
<lastmod>2026-04-19T00:00:00.000Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://megacpp.com/blog/ablation-after-10k-steps/</loc>
<lastmod>2026-04-18T00:00:00.000Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://megacpp.com/blog/activation-checkpointing-deep/</loc>
<lastmod>2026-04-18T00:00:00.000Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://megacpp.com/blog/activation-checkpointing-policy/</loc>
<lastmod>2026-04-18T00:00:00.000Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://megacpp.com/blog/activation-recompute-boundaries/</loc>
<lastmod>2026-04-18T00:00:00.000Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://megacpp.com/blog/activations-and-how-we-split-them/</loc>
<lastmod>2026-04-18T00:00:00.000Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://megacpp.com/blog/adapter-system-and-lora-stack/</loc>
<lastmod>2026-04-18T00:00:00.000Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://megacpp.com/blog/attention-sinks-and-telemetry-on-tpu/</loc>
<lastmod>2026-04-18T00:00:00.000Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://megacpp.com/blog/attention-validity-and-structure/</loc>
<lastmod>2026-04-18T00:00:00.000Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://megacpp.com/blog/block-sparse-attention-on-tpu/</loc>
<lastmod>2026-04-18T00:00:00.000Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://megacpp.com/blog/checkpoint-format-and-resume/</loc>
<lastmod>2026-04-18T00:00:00.000Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://megacpp.com/blog/clang-semantic-indexing/</loc>
<lastmod>2026-04-18T00:00:00.000Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://megacpp.com/blog/code-deduplication-at-scale/</loc>
<lastmod>2026-04-18T00:00:00.000Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://megacpp.com/blog/comms-cost-and-overlap/</loc>
<lastmod>2026-04-18T00:00:00.000Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://megacpp.com/blog/compile-commands-and-semantic-graphs/</loc>
<lastmod>2026-04-18T00:00:00.000Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://megacpp.com/blog/compile-time-vs-runtime-tradeoffs/</loc>
<lastmod>2026-04-18T00:00:00.000Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://megacpp.com/blog/context-parallel-and-sequence-parallel/</loc>
<lastmod>2026-04-18T00:00:00.000Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://megacpp.com/blog/cpp-corpus-construction-detailed/</loc>
<lastmod>2026-04-18T00:00:00.000Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://megacpp.com/blog/cpp-data-enhancements-and-why/</loc>
<lastmod>2026-04-18T00:00:00.000Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://megacpp.com/blog/cpp-data-preparation-pipeline-deep/</loc>
<lastmod>2026-04-18T00:00:00.000Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://megacpp.com/blog/cpp-data-versioning-and-schema/</loc>
<lastmod>2026-04-18T00:00:00.000Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://megacpp.com/blog/cpp-eval-suites-and-verifiers/</loc>
<lastmod>2026-04-18T00:00:00.000Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://megacpp.com/blog/cpp-tokenizer-deep/</loc>
<lastmod>2026-04-18T00:00:00.000Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://megacpp.com/blog/cpu-offload-and-memory-calibration-nvidia/</loc>
<lastmod>2026-04-18T00:00:00.000Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://megacpp.com/blog/cute-dsl-experiments/</loc>
<lastmod>2026-04-18T00:00:00.000Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://megacpp.com/blog/data-pipeline-story/</loc>
<lastmod>2026-04-18T00:00:00.000Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://megacpp.com/blog/data-poisoning-and-refusal-drills/</loc>
<lastmod>2026-04-18T00:00:00.000Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://megacpp.com/blog/data-shuffling-and-seed-discipline/</loc>
<lastmod>2026-04-18T00:00:00.000Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://megacpp.com/blog/dataloader-throughput-and-stalls/</loc>
<lastmod>2026-04-18T00:00:00.000Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://megacpp.com/blog/dataset-versions-deep-dive/</loc>
<lastmod>2026-04-18T00:00:00.000Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://megacpp.com/blog/dataset-versions-v2-to-v6/</loc>
<lastmod>2026-04-18T00:00:00.000Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://megacpp.com/blog/determinism-and-bit-exact-runs/</loc>
<lastmod>2026-04-18T00:00:00.000Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://megacpp.com/blog/distillation-bestofn-and-rl/</loc>
<lastmod>2026-04-18T00:00:00.000Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://megacpp.com/blog/distributed-optimizer-stress/</loc>
<lastmod>2026-04-18T00:00:00.000Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://megacpp.com/blog/doc-masking-and-curriculum/</loc>
<lastmod>2026-04-18T00:00:00.000Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://megacpp.com/blog/dualpipe-and-3d-parallelism-on-nvidia/</loc>
<lastmod>2026-04-18T00:00:00.000Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://megacpp.com/blog/dynamo-and-compile-breakage/</loc>
<lastmod>2026-04-18T00:00:00.000Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://megacpp.com/blog/ep-pp-tp-cp-sp-dp-overview/</loc>
<lastmod>2026-04-18T00:00:00.000Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://megacpp.com/blog/eval/</loc>
<lastmod>2026-04-18T00:00:00.000Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://megacpp.com/blog/eval-harness-plumbing/</loc>
<lastmod>2026-04-18T00:00:00.000Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://megacpp.com/blog/expert-parallel-and-moe-sharding/</loc>
<lastmod>2026-04-18T00:00:00.000Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://megacpp.com/blog/external-library-glitches-we-fixed/</loc>
<lastmod>2026-04-18T00:00:00.000Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://megacpp.com/blog/fa4-catalog-on-blackwell/</loc>
<lastmod>2026-04-18T00:00:00.000Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://megacpp.com/blog/fire-dash-redo-in-practice/</loc>
<lastmod>2026-04-18T00:00:00.000Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://megacpp.com/blog/fire-plasticity-toolkit/</loc>
<lastmod>2026-04-18T00:00:00.000Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://megacpp.com/blog/flash-attention-4-in-practice/</loc>
<lastmod>2026-04-18T00:00:00.000Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://megacpp.com/blog/fp8-and-fp8-rollout/</loc>
<lastmod>2026-04-18T00:00:00.000Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://megacpp.com/blog/framework-survey-fsdp-vs-megatron-vs-deepspeed/</loc>
<lastmod>2026-04-18T00:00:00.000Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://megacpp.com/blog/fsdp-cuda-and-megatron-ddp/</loc>
<lastmod>2026-04-18T00:00:00.000Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://megacpp.com/blog/fsdp2-on-xla-tpu/</loc>
<lastmod>2026-04-18T00:00:00.000Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://megacpp.com/blog/fsdp2-pain-and-payoff/</loc>
<lastmod>2026-04-18T00:00:00.000Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://megacpp.com/blog/fused-mla-on-nvidia/</loc>
<lastmod>2026-04-18T00:00:00.000Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://megacpp.com/blog/fused-moe-and-deep-ep-on-nvidia/</loc>
<lastmod>2026-04-18T00:00:00.000Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://megacpp.com/blog/gated-deltanet-and-hyper-connections/</loc>
<lastmod>2026-04-18T00:00:00.000Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://megacpp.com/blog/gb10-journey/</loc>
<lastmod>2026-04-18T00:00:00.000Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://megacpp.com/blog/geodesic-loss-design-notes/</loc>
<lastmod>2026-04-18T00:00:00.000Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://megacpp.com/blog/gradient-accumulation-and-microbatching/</loc>
<lastmod>2026-04-18T00:00:00.000Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://megacpp.com/blog/graph-recompilation-hell/</loc>
<lastmod>2026-04-18T00:00:00.000Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://megacpp.com/blog/h200-bringup-and-naming/</loc>
<lastmod>2026-04-18T00:00:00.000Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://megacpp.com/blog/h200-memory-geometry/</loc>
<lastmod>2026-04-18T00:00:00.000Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://megacpp.com/blog/how-we-keep-a-patch-lane/</loc>
<lastmod>2026-04-18T00:00:00.000Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://megacpp.com/blog/hybrid-layer-interleaving/</loc>
<lastmod>2026-04-18T00:00:00.000Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://megacpp.com/blog/inference-serving-stack/</loc>
<lastmod>2026-04-18T00:00:00.000Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://megacpp.com/blog/kernel-catalog-and-impact/</loc>
<lastmod>2026-04-18T00:00:00.000Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://megacpp.com/blog/kernels-that-pay-for-themselves/</loc>
<lastmod>2026-04-18T00:00:00.000Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://megacpp.com/blog/kv-cache-and-paged-attention/</loc>
<lastmod>2026-04-18T00:00:00.000Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://megacpp.com/blog/libtpu-and-jax-interaction/</loc>
<lastmod>2026-04-18T00:00:00.000Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://megacpp.com/blog/license-and-corpus-provenance/</loc>
<lastmod>2026-04-18T00:00:00.000Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://megacpp.com/blog/long-context-and-sinks/</loc>
<lastmod>2026-04-18T00:00:00.000Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://megacpp.com/blog/loss-curves-and-divergence-playbook/</loc>
<lastmod>2026-04-18T00:00:00.000Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://megacpp.com/blog/m2rnn-engram-memory/</loc>
<lastmod>2026-04-18T00:00:00.000Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://megacpp.com/blog/mamba3-fused-trapezoidal-on-tpu/</loc>
<lastmod>2026-04-18T00:00:00.000Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://megacpp.com/blog/mamba3-hybrid/</loc>
<lastmod>2026-04-18T00:00:00.000Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://megacpp.com/blog/mamba3-kernel-journey/</loc>
<lastmod>2026-04-18T00:00:00.000Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://megacpp.com/blog/mamba3-parallel-performance/</loc>
<lastmod>2026-04-18T00:00:00.000Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://megacpp.com/blog/manual-splits-and-what-they-cost/</loc>
<lastmod>2026-04-18T00:00:00.000Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://megacpp.com/blog/megacpp-model-glossary/</loc>
<lastmod>2026-04-18T00:00:00.000Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://megacpp.com/blog/memory-budget-anatomy/</loc>
<lastmod>2026-04-18T00:00:00.000Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://megacpp.com/blog/mhc-fused-on-blackwell/</loc>
<lastmod>2026-04-18T00:00:00.000Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://megacpp.com/blog/mla-and-weight-absorption/</loc>
<lastmod>2026-04-18T00:00:00.000Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://megacpp.com/blog/mod-moda-mtp-stack/</loc>
<lastmod>2026-04-18T00:00:00.000Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://megacpp.com/blog/modal-benchmark-receipts/</loc>
<lastmod>2026-04-18T00:00:00.000Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://megacpp.com/blog/modal-debugging-playbook/</loc>
<lastmod>2026-04-18T00:00:00.000Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://megacpp.com/blog/modal-image-and-cold-start/</loc>
<lastmod>2026-04-18T00:00:00.000Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://megacpp.com/blog/modal-multi-gpu-issues-and-fixes/</loc>
<lastmod>2026-04-18T00:00:00.000Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://megacpp.com/blog/modal-training-platform-overview/</loc>
<lastmod>2026-04-18T00:00:00.000Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://megacpp.com/blog/modal-vs-owned-hardware/</loc>
<lastmod>2026-04-18T00:00:00.000Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://megacpp.com/blog/moe-routing-we-actually-shipped/</loc>
<lastmod>2026-04-18T00:00:00.000Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://megacpp.com/blog/multi-gpu-modal-benchmarks/</loc>
<lastmod>2026-04-18T00:00:00.000Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://megacpp.com/blog/muon-optimizer-on-nvidia/</loc>
<lastmod>2026-04-18T00:00:00.000Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://megacpp.com/blog/nccl-and-collective-hangs/</loc>
<lastmod>2026-04-18T00:00:00.000Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://megacpp.com/blog/nvfp4-inference/</loc>
<lastmod>2026-04-18T00:00:00.000Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://megacpp.com/blog/observability-and-slo-dashboards/</loc>
<lastmod>2026-04-18T00:00:00.000Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://megacpp.com/blog/one-morning-of-bugs/</loc>
<lastmod>2026-04-18T00:00:00.000Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://megacpp.com/blog/oom-debugging-playbook-h200/</loc>
<lastmod>2026-04-18T00:00:00.000Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://megacpp.com/blog/oom-on-v6e/</loc>
<lastmod>2026-04-18T00:00:00.000Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://megacpp.com/blog/pallas-fa-softcap-on-tpu/</loc>
<lastmod>2026-04-18T00:00:00.000Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://megacpp.com/blog/pallas-on-tpu/</loc>
<lastmod>2026-04-18T00:00:00.000Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://megacpp.com/blog/porting-to-megatron-friction/</loc>
<lastmod>2026-04-18T00:00:00.000Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://megacpp.com/blog/precision-recipe-fp16-bf16-fp8-nvfp4/</loc>
<lastmod>2026-04-18T00:00:00.000Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://megacpp.com/blog/profiler-and-receipts/</loc>
<lastmod>2026-04-18T00:00:00.000Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://megacpp.com/blog/profiler-guided-optimization/</loc>
<lastmod>2026-04-18T00:00:00.000Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://megacpp.com/blog/sequence-context-expert-splits/</loc>
<lastmod>2026-04-18T00:00:00.000Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://megacpp.com/blog/slm-architecture/</loc>
<lastmod>2026-04-18T00:00:00.000Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://megacpp.com/blog/slm-data/</loc>
<lastmod>2026-04-18T00:00:00.000Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://megacpp.com/blog/slm-training/</loc>
<lastmod>2026-04-18T00:00:00.000Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://megacpp.com/blog/sota-ablation-and-comparison/</loc>
<lastmod>2026-04-18T00:00:00.000Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://megacpp.com/blog/specialists/</loc>
<lastmod>2026-04-18T00:00:00.000Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://megacpp.com/blog/speculative-decoding-for-ensembles/</loc>
<lastmod>2026-04-18T00:00:00.000Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://megacpp.com/blog/stp-after-ten-thousand-steps/</loc>
<lastmod>2026-04-18T00:00:00.000Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://megacpp.com/blog/stp-geodesic-regularizer/</loc>
<lastmod>2026-04-18T00:00:00.000Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://megacpp.com/blog/structure-embeddings-and-relation-bias/</loc>
<lastmod>2026-04-18T00:00:00.000Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://megacpp.com/blog/te-replacements-on-tpu/</loc>
<lastmod>2026-04-18T00:00:00.000Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://megacpp.com/blog/tensor-parallel-and-sharding/</loc>
<lastmod>2026-04-18T00:00:00.000Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://megacpp.com/blog/throughput-vs-quality-knobs/</loc>
<lastmod>2026-04-18T00:00:00.000Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://megacpp.com/blog/tokenized-enriched-pipeline-on-tpu/</loc>
<lastmod>2026-04-18T00:00:00.000Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://megacpp.com/blog/tokenizer-v2-v3/</loc>
<lastmod>2026-04-18T00:00:00.000Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://megacpp.com/blog/torch-2-12-journey/</loc>
<lastmod>2026-04-18T00:00:00.000Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://megacpp.com/blog/torch-212-nightly-wheel-matrix/</loc>
<lastmod>2026-04-18T00:00:00.000Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://megacpp.com/blog/torch-xla-pjrt-reality/</loc>
<lastmod>2026-04-18T00:00:00.000Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://megacpp.com/blog/tpu-v6e-host-bringup/</loc>
<lastmod>2026-04-18T00:00:00.000Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://megacpp.com/blog/training-on-h200-eight-gpu/</loc>
<lastmod>2026-04-18T00:00:00.000Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://megacpp.com/blog/training-speed-anatomy-h200/</loc>
<lastmod>2026-04-18T00:00:00.000Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://megacpp.com/blog/training-speed-by-feature/</loc>
<lastmod>2026-04-18T00:00:00.000Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://megacpp.com/blog/transformer-engine-bridge-on-nvidia/</loc>
<lastmod>2026-04-18T00:00:00.000Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://megacpp.com/blog/triton-kernels-we-maintain/</loc>
<lastmod>2026-04-18T00:00:00.000Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://megacpp.com/blog/unique-additions-and-why/</loc>
<lastmod>2026-04-18T00:00:00.000Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://megacpp.com/blog/upstream-pr-mamba3-and-mla/</loc>
<lastmod>2026-04-18T00:00:00.000Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://megacpp.com/blog/upstream-pr-tilelang-and-megatron/</loc>
<lastmod>2026-04-18T00:00:00.000Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://megacpp.com/blog/upstream-prs-overview/</loc>
<lastmod>2026-04-18T00:00:00.000Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://megacpp.com/blog/v6e-performance-deep-dive/</loc>
<lastmod>2026-04-18T00:00:00.000Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://megacpp.com/blog/verifier-first-cpp-evals/</loc>
<lastmod>2026-04-18T00:00:00.000Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://megacpp.com/blog/what-megatron-can-and-cannot-split/</loc>
<lastmod>2026-04-18T00:00:00.000Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://megacpp.com/blog/why-small-model-fills-huge-gpu/</loc>
<lastmod>2026-04-18T00:00:00.000Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://megacpp.com/blog/xla-adamw-and-flags-on-tpu/</loc>
<lastmod>2026-04-18T00:00:00.000Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://megacpp.com/blog/xla-spmd-sharding-annotations/</loc>
<lastmod>2026-04-18T00:00:00.000Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://megacpp.com/blog/xla-spmd-tokenizer-and-vocab-on-tpu/</loc>
<lastmod>2026-04-18T00:00:00.000Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://megacpp.com/blog/xla-vs-cuda-stack-decisions/</loc>
<lastmod>2026-04-18T00:00:00.000Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
</urlset>
