<?xml-stylesheet type="text/xsl" encoding="UTF-8" href="iform.xsl" version="1.0"?>
<!DOCTYPE instructionsection PUBLIC "-//ARM//DTD instructionsection //EN" "iform-p.dtd">
<!-- Copyright (c) 2010-2025 Arm Limited or its affiliates. All rights reserved. -->
<!-- This document is Non-Confidential. This document may only be used and distributed in accordance with the terms of the agreement entered into by Arm and the party that Arm delivered this document to. -->
<instructionsection id="FMMLA_FP8FP16" title="FMMLA (widening, 8-bit floating-point to half-precision) -- A64" type="instruction">
  <docvars>
    <docvar key="advsimd-type" value="simd"/>
    <docvar key="instr-class" value="advsimd"/>
    <docvar key="isa" value="A64"/>
    <docvar key="mnemonic" value="FMMLA"/>
  </docvars>
  <heading>FMMLA (widening, 8-bit floating-point to half-precision)</heading>
  <desc>
    <brief>
      <para>8-bit floating-point matrix multiply-accumulate to half-precision</para>
    </brief>
    <authored>
      <para>This instruction performs the fused sum-of-products within each four adjacent 8-bit
elements while multiplying the 2×4 matrix of 8-bit floating-point values
held in each 64-bit segment of the first source vector by the 4×2 matrix
of 8-bit floating-point values in the corresponding segment of the second
source vector. The half-precision sum-of-products are scaled by
2<sup>-UInt(FPMR.LSCALE[3:0])</sup>, before being destructively added
without intermediate rounding to the 2x2 half-precision matrix in the
destination vector. This is equivalent to accumulating 4-way dot product
per destination element.</para>
      <para>The 8-bit floating-point encoding format for the elements of the first source
vector is selected by <register_link id="AArch64-fpmr.xml" state="AArch64">FPMR</register_link>.F8S1. The 8-bit floating-point
encoding format for the elements of the second source vector is selected by
<register_link id="AArch64-fpmr.xml" state="AArch64">FPMR</register_link>.F8S2.</para>
    </authored>
  </desc>
  <alias_list howmany="0"/>
  <classes>
    <iclass name="Advanced SIMD" oneof="1" id="iclass_advanced_simd" no_encodings="1" isa="A64">
      <docvars>
        <docvar key="advsimd-type" value="simd"/>
        <docvar key="instr-class" value="advsimd"/>
        <docvar key="isa" value="A64"/>
        <docvar key="mnemonic" value="FMMLA"/>
      </docvars>
      <iclassintro count="1"/>
      <arch_variants>
        <arch_variant feature="FEAT_F8F16MM" name="v9Ap6"/>
      </arch_variants>
      <regdiagram form="32" psname="A64.simd_dp.asimdsame2.FMMLA_asimd_FP8FP16" tworows="1">
        <box hibit="31" width="1" settings="1">
          <c>0</c>
        </box>
        <box hibit="30" name="Q" usename="1" settings="1" psbits="x">
          <c>1</c>
        </box>
        <box hibit="29" name="U" usename="1" settings="1" psbits="x">
          <c>1</c>
        </box>
        <box hibit="28" width="1" settings="1">
          <c>0</c>
        </box>
        <box hibit="27" width="3" settings="3">
          <c>1</c>
          <c>1</c>
          <c>1</c>
        </box>
        <box hibit="24" width="1" settings="1">
          <c>0</c>
        </box>
        <box hibit="23" width="2" name="size" usename="1" settings="2" psbits="xx">
          <c>0</c>
          <c>0</c>
        </box>
        <box hibit="21" width="1" settings="1">
          <c>0</c>
        </box>
        <box hibit="20" width="5" name="Rm" usename="1">
          <c colspan="5"/>
        </box>
        <box hibit="15" width="1" settings="1">
          <c>1</c>
        </box>
        <box hibit="14" width="4" name="opcode" usename="1" settings="4" psbits="xxxx">
          <c>1</c>
          <c>1</c>
          <c>0</c>
          <c>1</c>
        </box>
        <box hibit="10" width="1" settings="1">
          <c>1</c>
        </box>
        <box hibit="9" width="5" name="Rn" usename="1">
          <c colspan="5"/>
        </box>
        <box hibit="4" width="5" name="Rd" usename="1">
          <c colspan="5"/>
        </box>
      </regdiagram>
      <encoding name="FMMLA_asimd_FP8FP16" oneofinclass="1" oneof="1" label="">
        <docvars>
          <docvar key="instr-class" value="advsimd"/>
          <docvar key="isa" value="A64"/>
          <docvar key="advsimd-type" value="simd"/>
          <docvar key="mnemonic" value="FMMLA"/>
        </docvars>
        <asmtemplate><text>FMMLA  </text><a hover="Is the name of the SIMD&amp;FP third source and destination register, encoded in the &quot;Rd&quot; field." link="Vd__3">&lt;Vd&gt;</a><text>.8H, </text><a hover="Is the name of the first SIMD&amp;FP source register, encoded in the &quot;Rn&quot; field." link="Vn__2">&lt;Vn&gt;</a><text>.16B, </text><a hover="Is the name of the second SIMD&amp;FP source register, encoded in the &quot;Rm&quot; field." link="Vm">&lt;Vm&gt;</a><text>.16B</text></asmtemplate>
      </encoding>
      <ps_section howmany="1">
        <ps name="A64.simd_dp.asimdsame2.FMMLA_asimd_FP8FP16" sections="1" secttype="noheading">
          <pstext mayhavelinks="1" section="Decode" rep_section="decode">if !IsFeatureImplemented(FEAT_F8F16MM) then EndOfDecode(Decode_UNDEF); end;
let n : integer = UInt(Rn);
let m : integer = UInt(Rm);
let d : integer = UInt(Rd);</pstext></ps>
      </ps_section>
    </iclass>
  </classes>
  <explanations scope="all">
    <explanation enclist="FMMLA_asimd_FP8FP16" symboldefcount="1">
      <symbol link="Vd__3">&lt;Vd&gt;</symbol>
      <account encodedin="Rd">
        <intro>
          <para>Is the name of the SIMD&amp;FP third source and destination register, encoded in the "Rd" field.</para>
        </intro>
      </account>
    </explanation>
    <explanation enclist="FMMLA_asimd_FP8FP16" symboldefcount="1">
      <symbol link="Vn__2">&lt;Vn&gt;</symbol>
      <account encodedin="Rn">
        <intro>
          <para>Is the name of the first SIMD&amp;FP source register, encoded in the "Rn" field.</para>
        </intro>
      </account>
    </explanation>
    <explanation enclist="FMMLA_asimd_FP8FP16" symboldefcount="1">
      <symbol link="Vm">&lt;Vm&gt;</symbol>
      <account encodedin="Rm">
        <intro>
          <para>Is the name of the second SIMD&amp;FP source register, encoded in the "Rm" field.</para>
        </intro>
      </account>
    </explanation>
  </explanations>
  <ps_section howmany="1">
    <ps name="A64.simd_dp.asimdsame2.FMMLA_asimd_FP8FP16" sections="1" secttype="Operation">
      <pstext mayhavelinks="1" section="Execute" rep_section="execute">CheckFPMREnabled();
AArch64_CheckFPAdvSIMDEnabled();
let operand1 : bits(128) = V{}(n);
let operand2 : bits(128) = V{}(m);
let operand3 : bits(128) = V{}(d);

var result : bits(128);
var op1, op2, acc : bits(64);

for s = 0 to 1 do
    op1 = operand1[s*:64];
    op2 = operand2[s*:64];
    acc = operand3[s*:64];
    result[s*:64] = FP8MatMulAddFP{64}(acc, op1, op2, 4, FPCR(), FPMR());
end;

V{128}(d) = result;</pstext></ps>
  </ps_section>
  <timestamp>2026-03-12 12:23:09</timestamp>
  <commit_id>2025-09_rel_asl1</commit_id>
</instructionsection>
