<?xml version="1.0" ?><?xml-stylesheet type="text/xsl" encoding="UTF-8" href="iform.xsl" version="1.0"?><!DOCTYPE instructionsection  PUBLIC '-//ARM//DTD instructionsection //EN'  'iform-p.dtd'><!-- Copyright (c) 2010-2026 Arm Limited or its affiliates. All rights reserved. --><!-- This document is Non-Confidential. This document may only be used and distributed in accordance with the terms of the agreement entered into by Arm and the party that Arm delivered this document to. --><instructionsection id="VMMLA" title="VMMLA -- AArch32" type="instruction">
  <docvars>
    <docvar key="instr-class" value="fpsimd"/>
    <docvar key="mnemonic" value="VMMLA"/>
    <docvar key="simdvectorsize" value="quad"/>
  </docvars>
  <heading>VMMLA</heading>
  <desc>
    <brief>
      <para>BFloat16 floating-point matrix multiply-accumulate</para>
    </brief>
    <authored>
      <para>BFloat16 floating-point matrix multiply-accumulate.
This instruction multiplies the 2x4 matrix of BF16 values
in the first 128-bit source vector by the 4x2 BF16 matrix
in the second 128-bit source vector. The resulting 2x2
single-precision matrix product is then added destructively
to the 2x2 single-precision matrix in the 128-bit destination
vector. This is equivalent to performing a 4-way dot product
per destination element. The instruction does not update the <xref linkend="ARMARM_AArch32.fpscr">FPSCR</xref> exception status.</para>
      <note>
        <para>Arm expects that the VMMLA instruction will deliver a
peak BF16 multiply throughput that is at least as high as
can be achieved using two VDOT instructions, with a goal
that it should have significantly higher throughput.</para>
      </note>
    </authored>
  </desc>
  <alias_list howmany="0"/>
  <classes>
    <classesintro count="2">
      <txt>It has encodings from the following instruction sets:</txt>
      <txt> A32 (</txt>
      <a href="#iclass_a1">A1</a>
      <txt>)</txt>
      <txt> and </txt>
      <txt> T32 (</txt>
      <a href="#iclass_t1">T1</a>
      <txt>)</txt>
      <txt>.</txt>
    </classesintro>
    <iclass name="A1" oneof="2" id="iclass_a1" no_encodings="1" isa="A32">
      <docvars>
        <docvar key="armarmheading" value="A1"/>
        <docvar key="instr-class" value="fpsimd"/>
        <docvar key="isa" value="A32"/>
        <docvar key="mnemonic" value="VMMLA"/>
        <docvar key="simdvectorsize" value="quad"/>
      </docvars>
      <iclassintro count="1"/>
      <arch_variants>
        <arch_variant feature="FEAT_AA32BF16" name="v8Ap6"/>
      </arch_variants>
      <regdiagram form="32" psname="A32.cops_as.advsimdext.simd3reg_sameext.VMMLA_A1_Q" tworows="1">
        <box hibit="31" width="4" settings="4">
          <c>1</c>
          <c>1</c>
          <c>1</c>
          <c>1</c>
        </box>
        <box hibit="27" width="2" settings="2">
          <c>1</c>
          <c>1</c>
        </box>
        <box hibit="25" width="1" settings="1">
          <c>0</c>
        </box>
        <box hibit="24" width="2" name="op1" usename="1" settings="2" psbits="xx">
          <c>0</c>
          <c>0</c>
        </box>
        <box hibit="22" width="1" name="D" usename="1">
          <c colspan="1"/>
        </box>
        <box hibit="21" width="2" name="op2" usename="1" settings="2" psbits="xx">
          <c>0</c>
          <c>0</c>
        </box>
        <box hibit="19" width="4" name="Vn" usename="1">
          <c colspan="4"/>
        </box>
        <box hibit="15" width="4" name="Vd" usename="1">
          <c colspan="4"/>
        </box>
        <box hibit="11" width="1" settings="1">
          <c>1</c>
        </box>
        <box hibit="10" name="op3" usename="1" settings="1" psbits="x">
          <c>1</c>
        </box>
        <box hibit="9" width="1" settings="1">
          <c>0</c>
        </box>
        <box hibit="8" name="op4" usename="1" settings="1" psbits="x">
          <c>0</c>
        </box>
        <box hibit="7" width="1" name="N" usename="1">
          <c colspan="1"/>
        </box>
        <box hibit="6" name="Q" usename="1" settings="1" psbits="x">
          <c>1</c>
        </box>
        <box hibit="5" width="1" name="M" usename="1">
          <c colspan="1"/>
        </box>
        <box hibit="4" name="U" usename="1" settings="1" psbits="x">
          <c>0</c>
        </box>
        <box hibit="3" width="4" name="Vm" usename="1">
          <c colspan="4"/>
        </box>
      </regdiagram>
      <encoding name="VMMLA_A1_Q" oneofinclass="1" oneof="2" label="">
        <docvars>
          <docvar key="armarmheading" value="A1"/>
          <docvar key="instr-class" value="fpsimd"/>
          <docvar key="isa" value="A32"/>
          <docvar key="simdvectorsize" value="quad"/>
          <docvar key="mnemonic" value="VMMLA"/>
        </docvars>
        <asmtemplate><text>VMMLA{</text><a hover="See x[Standard assembler syntax fields](Babbefhf)." link="qw_option">&lt;q&gt;</a><text>}.BF16  </text><a hover="Is the 128-bit name of the SIMD&amp;FP destination register, encoded in the &quot;D:Vd&quot; field as &lt;Qd&gt;*2." link="D_Vd__4">&lt;Qd&gt;</a><text>, </text><a hover="Is the 128-bit name of the first SIMD&amp;FP source register, encoded in the &quot;N:Vn&quot; field as &lt;Qn&gt;*2." link="N_Vn__2">&lt;Qn&gt;</a><text>, </text><a hover="Is the 128-bit name of the second SIMD&amp;FP source register, encoded in the &quot;M:Vm&quot; field as &lt;Qm&gt;*2." link="M_Vm__5">&lt;Qm&gt;</a></asmtemplate>
      </encoding>
      <ps_section howmany="1">
        <ps name="A32.cops_as.advsimdext.simd3reg_sameext.VMMLA_A1_Q" sections="1" secttype="noheading">
          <pstext mayhavelinks="1" section="Decode" rep_section="decode">if !IsFeatureImplemented(FEAT_AA32BF16) then Undefined(); end;
if Vd[0] == '1' || Vn[0] == '1' || Vm[0] == '1' then Undefined(); end;
let d : integer = UInt(D::Vd);
let n : integer = UInt(N::Vn);
let m : integer = UInt(M::Vm);
let regs : integer = 2;</pstext></ps>
      </ps_section>
    </iclass>
    <iclass name="T1" oneof="2" id="iclass_t1" no_encodings="1" isa="T32">
      <docvars>
        <docvar key="armarmheading" value="T1"/>
        <docvar key="instr-class" value="fpsimd"/>
        <docvar key="isa" value="T32"/>
        <docvar key="mnemonic" value="VMMLA"/>
        <docvar key="simdvectorsize" value="quad"/>
      </docvars>
      <iclassintro count="1"/>
      <arch_variants>
        <arch_variant feature="FEAT_AA32BF16" name="v8Ap6"/>
      </arch_variants>
      <regdiagram form="16x2" psname="T32.w.cpaf.advsimdext.simd_3sameext.VMMLA_T1_Q" tworows="1">
        <box hibit="31" width="3" settings="3">
          <c>1</c>
          <c>1</c>
          <c>1</c>
        </box>
        <box hibit="28" width="1" settings="1">
          <c>1</c>
        </box>
        <box hibit="27" width="2" settings="2">
          <c>1</c>
          <c>1</c>
        </box>
        <box hibit="25" width="1" settings="1">
          <c>0</c>
        </box>
        <box hibit="24" width="2" name="op1" usename="1" settings="2" psbits="xx">
          <c>0</c>
          <c>0</c>
        </box>
        <box hibit="22" width="1" name="D" usename="1">
          <c colspan="1"/>
        </box>
        <box hibit="21" width="2" name="op2" usename="1" settings="2" psbits="xx">
          <c>0</c>
          <c>0</c>
        </box>
        <box hibit="19" width="4" name="Vn" usename="1">
          <c colspan="4"/>
        </box>
        <box hibit="15" width="4" name="Vd" usename="1">
          <c colspan="4"/>
        </box>
        <box hibit="11" width="1" settings="1">
          <c>1</c>
        </box>
        <box hibit="10" name="op3" usename="1" settings="1" psbits="x">
          <c>1</c>
        </box>
        <box hibit="9" width="1" settings="1">
          <c>0</c>
        </box>
        <box hibit="8" name="op4" usename="1" settings="1" psbits="x">
          <c>0</c>
        </box>
        <box hibit="7" width="1" name="N" usename="1">
          <c colspan="1"/>
        </box>
        <box hibit="6" name="Q" usename="1" settings="1" psbits="x">
          <c>1</c>
        </box>
        <box hibit="5" width="1" name="M" usename="1">
          <c colspan="1"/>
        </box>
        <box hibit="4" name="U" usename="1" settings="1" psbits="x">
          <c>0</c>
        </box>
        <box hibit="3" width="4" name="Vm" usename="1">
          <c colspan="4"/>
        </box>
      </regdiagram>
      <encoding name="VMMLA_T1_Q" oneofinclass="1" oneof="2" label="">
        <docvars>
          <docvar key="armarmheading" value="T1"/>
          <docvar key="instr-class" value="fpsimd"/>
          <docvar key="isa" value="T32"/>
          <docvar key="simdvectorsize" value="quad"/>
          <docvar key="mnemonic" value="VMMLA"/>
        </docvars>
        <asmtemplate><text>VMMLA{</text><a hover="See x[Standard assembler syntax fields](Babbefhf)." link="qw_option">&lt;q&gt;</a><text>}.BF16  </text><a hover="Is the 128-bit name of the SIMD&amp;FP destination register, encoded in the &quot;D:Vd&quot; field as &lt;Qd&gt;*2." link="D_Vd__4">&lt;Qd&gt;</a><text>, </text><a hover="Is the 128-bit name of the first SIMD&amp;FP source register, encoded in the &quot;N:Vn&quot; field as &lt;Qn&gt;*2." link="N_Vn__2">&lt;Qn&gt;</a><text>, </text><a hover="Is the 128-bit name of the second SIMD&amp;FP source register, encoded in the &quot;M:Vm&quot; field as &lt;Qm&gt;*2." link="M_Vm__5">&lt;Qm&gt;</a></asmtemplate>
      </encoding>
      <ps_section howmany="1">
        <ps name="T32.w.cpaf.advsimdext.simd_3sameext.VMMLA_T1_Q" sections="1" secttype="noheading">
          <pstext mayhavelinks="1" section="Decode" rep_section="decode">if <a link="func_InITBlock_0" file="shared_pseudocode.xml">InITBlock</a>() then UnpredictableProcedure(); end;
if !IsFeatureImplemented(FEAT_AA32BF16) then Undefined(); end;
if Vd[0] == '1' || Vn[0] == '1' || Vm[0] == '1' then Undefined(); end;
let d : integer = UInt(D::Vd);
let n : integer = UInt(N::Vn);
let m : integer = UInt(M::Vm);
let regs : integer = 2;</pstext></ps>
      </ps_section>
    </iclass>
  </classes>
  <explanations scope="all">
    <explanation enclist="VMMLA_A1_Q, VMMLA_T1_Q" symboldefcount="1">
      <symbol link="qw_option">&lt;q&gt;</symbol>
      <account encodedin="">
        <intro>
          <para>See <xref linkend="Babbefhf">Standard assembler syntax fields</xref>.</para>
        </intro>
      </account>
    </explanation>
    <explanation enclist="VMMLA_A1_Q, VMMLA_T1_Q" symboldefcount="1">
      <symbol link="D_Vd__4">&lt;Qd&gt;</symbol>
      <account encodedin="(D :: Vd)">
        <intro>
          <para>Is the 128-bit name of the SIMD&amp;FP destination register, encoded in the &quot;D:Vd&quot; field as &lt;Qd&gt;*2.</para>
        </intro>
      </account>
    </explanation>
    <explanation enclist="VMMLA_A1_Q, VMMLA_T1_Q" symboldefcount="1">
      <symbol link="N_Vn__2">&lt;Qn&gt;</symbol>
      <account encodedin="(N :: Vn)">
        <intro>
          <para>Is the 128-bit name of the first SIMD&amp;FP source register, encoded in the &quot;N:Vn&quot; field as &lt;Qn&gt;*2.</para>
        </intro>
      </account>
    </explanation>
    <explanation enclist="VMMLA_A1_Q, VMMLA_T1_Q" symboldefcount="1">
      <symbol link="M_Vm__5">&lt;Qm&gt;</symbol>
      <account encodedin="(M :: Vm)">
        <intro>
          <para>Is the 128-bit name of the second SIMD&amp;FP source register, encoded in the &quot;M:Vm&quot; field as &lt;Qm&gt;*2.</para>
        </intro>
      </account>
    </explanation>
  </explanations>
  <ps_section howmany="1">
    <ps name="A32.cops_as.advsimdext.simd3reg_sameext.VMMLA_A1_Q" sections="1" secttype="Operation">
      <pstext mayhavelinks="1" section="Execute" rep_section="execute"><a link="func_CheckAdvSIMDEnabled_0" file="shared_pseudocode.xml">CheckAdvSIMDEnabled</a>();

let operand1 : bits(128) = <a link="accessor_Q_1" file="shared_pseudocode.xml">Q</a>(n&gt;&gt;1);
let operand2 : bits(128) = <a link="accessor_Q_1" file="shared_pseudocode.xml">Q</a>(m&gt;&gt;1);
let acc : bits(128) = <a link="accessor_Q_1" file="shared_pseudocode.xml">Q</a>(d&gt;&gt;1);
let fpcr : FPCR_Type = <a link="func_EffectiveFPCR_0" file="shared_pseudocode.xml">EffectiveFPCR</a>();

<a link="accessor_Q_1" file="shared_pseudocode.xml">Q</a>(d&gt;&gt;1) = <a link="func_BFMatMulAddH_4" file="shared_pseudocode.xml">BFMatMulAddH</a>(acc, operand1, operand2, fpcr);</pstext></ps>
  </ps_section>
  <timestamp>2026-03-26 20:27:25</timestamp>
  <commit_id>2026-03_rel</commit_id>
</instructionsection>