Skip to content

[API Proposal]: Arm64 [Load/Store]Vector64 and [Load/Store]Vector128 for 2,3 and 4 variants #84510

@kunalspathak

Description

@kunalspathak

Background and motivation

These APIs prove a way to load Vector64 and Vector128 from the address. The x2, x3 and x4 variants provides way to load 2, 3 and 4 vectors simultaneously.

API Proposal

namespace System.Runtime.Intrinsics.Arm;

public abstract partial class AdvSimd
{
    // LD1 (multiple structures)
    // LoadVector64 already present

    // LD1 (multiple structures) 2 register variant
    public static unsafe (Vector64<byte>   Value1, Vector64<byte>   Value2) LoadVector64x2AndUnzip(byte*   address);
    public static unsafe (Vector64<sbyte>  Value1, Vector64<sbyte>  Value2) LoadVector64x2AndUnzip(sbyte*  address);
    public static unsafe (Vector64<short>  Value1, Vector64<short>  Value2) LoadVector64x2AndUnzip(short*  address);
    public static unsafe (Vector64<ushort> Value1, Vector64<ushort> Value2) LoadVector64x2AndUnzip(ushort* address);
    public static unsafe (Vector64<int>    Value1, Vector64<int>    Value2) LoadVector64x2AndUnzip(int*    address);
    public static unsafe (Vector64<uint>   Value1, Vector64<uint>   Value2) LoadVector64x2AndUnzip(uint*   address);
    public static unsafe (Vector64<float>  Value1, Vector64<float>  Value2) LoadVector64x2AndUnzip(float*  address);

    // LD1 (multiple structures) 3 register variant
    public static unsafe (Vector64<byte>   Value1, Vector64<byte>   Value2, Vector64<byte>   Value3) LoadVector64x3AndUnzip(byte*   address);
    public static unsafe (Vector64<sbyte>  Value1, Vector64<sbyte>  Value2, Vector64<sbyte>  Value3) LoadVector64x3AndUnzip(sbyte*  address);
    public static unsafe (Vector64<short>  Value1, Vector64<short>  Value2, Vector64<short>  Value3) LoadVector64x3AndUnzip(short*  address);
    public static unsafe (Vector64<ushort> Value1, Vector64<ushort> Value2, Vector64<ushort> Value3) LoadVector64x3AndUnzip(ushort* address);
    public static unsafe (Vector64<int>    Value1, Vector64<int>    Value2, Vector64<int>    Value3) LoadVector64x3AndUnzip(int*    address);
    public static unsafe (Vector64<uint>   Value1, Vector64<uint>   Value2, Vector64<uint>   Value3) LoadVector64x3AndUnzip(uint*   address);
    public static unsafe (Vector64<float>  Value1, Vector64<float>  Value2, Vector64<float>  Value3) LoadVector64x3AndUnzip(float*  address);
    
    // LD1 (multiple structures) 4 register variant            
    public static unsafe (Vector64<byte>   Value1, Vector64<byte>   Value2, Vector64<byte>   Value3, Vector64<byte>   Value4) LoadVector64x4AndUnzip(byte*   address);
    public static unsafe (Vector64<sbyte>  Value1, Vector64<sbyte>  Value2, Vector64<sbyte>  Value3, Vector64<sbyte>  Value4) LoadVector64x4AndUnzip(sbyte*  address);
    public static unsafe (Vector64<short>  Value1, Vector64<short>  Value2, Vector64<short>  Value3, Vector64<short>  Value4) LoadVector64x4AndUnzip(short*  address);
    public static unsafe (Vector64<ushort> Value1, Vector64<ushort> Value2, Vector64<ushort> Value3, Vector64<ushort> Value4) LoadVector64x4AndUnzip(ushort* address);
    public static unsafe (Vector64<int>    Value1, Vector64<int>    Value2, Vector64<int>    Value3, Vector64<int>    Value4) LoadVector64x4AndUnzip(int*    address);
    public static unsafe (Vector64<uint>   Value1, Vector64<uint>   Value2, Vector64<uint>   Value3, Vector64<uint>   Value4) LoadVector64x4AndUnzip(uint*   address);
    public static unsafe (Vector64<float>  Value1, Vector64<float>  Value2, Vector64<float>  Value3, Vector64<float>  Value4) LoadVector64x4AndUnzip(float*  address);
    
    // LD1 (single structure)
    // LoadAndInsertScalar already present

    // LD1R
    // LoadAndReplicateToVector64 already present
    
    // LD2 (multiple structures)
    public static unsafe (Vector64<byte>   Value1, Vector64<byte>   Value2) LoadVector64x2(byte*   address);
    public static unsafe (Vector64<sbyte>  Value1, Vector64<sbyte>  Value2) LoadVector64x2(sbyte*  address);
    public static unsafe (Vector64<short>  Value1, Vector64<short>  Value2) LoadVector64x2(short*  address);
    public static unsafe (Vector64<ushort> Value1, Vector64<ushort> Value2) LoadVector64x2(ushort* address);
    public static unsafe (Vector64<int>    Value1, Vector64<int>    Value2) LoadVector64x2(int*    address);
    public static unsafe (Vector64<uint>   Value1, Vector64<uint>   Value2) LoadVector64x2(uint*   address);
    public static unsafe (Vector64<float>  Value1, Vector64<float>  Value2) LoadVector64x2(float*  address);

    // LD2 (single structure)
    public static unsafe (Vector64<byte>   Value1, Vector64<byte>   Value2) LoadVectorAndInsertScalar64x2((Vector64<byte>   Value1, Vector64<byte>   Value2) value, byte index, byte*   address);
    public static unsafe (Vector64<sbyte>  Value1, Vector64<sbyte>  Value2) LoadVectorAndInsertScalar64x2((Vector64<sbyte>  Value1, Vector64<sbyte>  Value2) value, byte index, sbyte*  address);
    public static unsafe (Vector64<short>  Value1, Vector64<short>  Value2) LoadVectorAndInsertScalar64x2((Vector64<short>  Value1, Vector64<short>  Value2) value, byte index, short*  address);
    public static unsafe (Vector64<ushort> Value1, Vector64<ushort> Value2) LoadVectorAndInsertScalar64x2((Vector64<ushort> Value1, Vector64<ushort> Value2) value, byte index, ushort* address);
    public static unsafe (Vector64<int>    Value1, Vector64<int>    Value2) LoadVectorAndInsertScalar64x2((Vector64<int>    Value1, Vector64<int>    Value2) value, byte index, int*    address);
    public static unsafe (Vector64<uint>   Value1, Vector64<uint>   Value2) LoadVectorAndInsertScalar64x2((Vector64<uint>   Value1, Vector64<uint>   Value2) value, byte index, uint*   address);
    public static unsafe (Vector64<float>  Value1, Vector64<float>  Value2) LoadVectorAndInsertScalar64x2((Vector64<float>  Value1, Vector64<float>  Value2) value, byte index, float*  address);

    // LD2R
    public static unsafe (Vector64<byte>   Value1, Vector64<byte>   Value2) LoadAndReplicateToVector64x2(byte*   address);
    public static unsafe (Vector64<sbyte>  Value1, Vector64<sbyte>  Value2) LoadAndReplicateToVector64x2(sbyte*  address);
    public static unsafe (Vector64<short>  Value1, Vector64<short>  Value2) LoadAndReplicateToVector64x2(short*  address);
    public static unsafe (Vector64<ushort> Value1, Vector64<ushort> Value2) LoadAndReplicateToVector64x2(ushort* address);
    public static unsafe (Vector64<int>    Value1, Vector64<int>    Value2) LoadAndReplicateToVector64x2(int*    address);
    public static unsafe (Vector64<uint>   Value1, Vector64<uint>   Value2) LoadAndReplicateToVector64x2(uint*   address);
    public static unsafe (Vector64<float>  Value1, Vector64<float>  Value2) LoadAndReplicateToVector64x2(float*  address);

    // LD3 (multiple structures)
    public static unsafe (Vector64<byte>   Value1, Vector64<byte>   Value2, Vector64<byte>   Value3) LoadVector64x3(byte*   address);
    public static unsafe (Vector64<sbyte>  Value1, Vector64<sbyte>  Value2, Vector64<sbyte>  Value3) LoadVector64x3(sbyte*  address);
    public static unsafe (Vector64<short>  Value1, Vector64<short>  Value2, Vector64<short>  Value3) LoadVector64x3(short*  address);
    public static unsafe (Vector64<ushort> Value1, Vector64<ushort> Value2, Vector64<ushort> Value3) LoadVector64x3(ushort* address);
    public static unsafe (Vector64<int>    Value1, Vector64<int>    Value2, Vector64<int>    Value3) LoadVector64x3(int*    address);
    public static unsafe (Vector64<uint>   Value1, Vector64<uint>   Value2, Vector64<uint>   Value3) LoadVector64x3(uint*   address);
    public static unsafe (Vector64<float>  Value1, Vector64<float>  Value2, Vector64<float>  Value3) LoadVector64x3(float*  address);

    // LD3 (single structure)
    public static unsafe (Vector64<byte>   Value1, Vector64<byte>   Value2, Vector64<byte>   Value3) LoadVectorAndInsertScalar64x3((Vector64<byte>   Value1, Vector64<byte>   Value2, Vector64<byte>   Value3) value, byte index, byte*   address);
    public static unsafe (Vector64<sbyte>  Value1, Vector64<sbyte>  Value2, Vector64<sbyte>  Value3) LoadVectorAndInsertScalar64x3((Vector64<sbyte>  Value1, Vector64<sbyte>  Value2, Vector64<sbyte>  Value3) value, byte index, sbyte*  address);
    public static unsafe (Vector64<short>  Value1, Vector64<short>  Value2, Vector64<short>  Value3) LoadVectorAndInsertScalar64x3((Vector64<short>  Value1, Vector64<short>  Value2, Vector64<short>  Value3) value, byte index, short*  address);
    public static unsafe (Vector64<ushort> Value1, Vector64<ushort> Value2, Vector64<ushort> Value3) LoadVectorAndInsertScalar64x3((Vector64<ushort> Value1, Vector64<ushort> Value2, Vector64<ushort> Value3) value, byte index, ushort* address);
    public static unsafe (Vector64<int>    Value1, Vector64<int>    Value2, Vector64<int>    Value3) LoadVectorAndInsertScalar64x3((Vector64<int>    Value1, Vector64<int>    Value2, Vector64<int>    Value3) value, byte index, int*    address);
    public static unsafe (Vector64<uint>   Value1, Vector64<uint>   Value2, Vector64<uint>   Value3) LoadVectorAndInsertScalar64x3((Vector64<uint>   Value1, Vector64<uint>   Value2, Vector64<uint>   Value3) value, byte index, uint*   address);
    public static unsafe (Vector64<float>  Value1, Vector64<float>  Value2, Vector64<float>  Value3) LoadVectorAndInsertScalar64x3((Vector64<float>  Value1, Vector64<float>  Value2, Vector64<float>  Value3) value, byte index, float*  address);

    // LD3R
    public static unsafe (Vector64<byte>   Value1, Vector64<byte>   Value2, Vector64<byte>   Value3) LoadAndReplicateToVector64x3(byte*   address);
    public static unsafe (Vector64<sbyte>  Value1, Vector64<sbyte>  Value2, Vector64<sbyte>  Value3) LoadAndReplicateToVector64x3(sbyte*  address);
    public static unsafe (Vector64<short>  Value1, Vector64<short>  Value2, Vector64<short>  Value3) LoadAndReplicateToVector64x3(short*  address);
    public static unsafe (Vector64<ushort> Value1, Vector64<ushort> Value2, Vector64<ushort> Value3) LoadAndReplicateToVector64x3(ushort* address);
    public static unsafe (Vector64<int>    Value1, Vector64<int>    Value2, Vector64<int>    Value3) LoadAndReplicateToVector64x3(int*    address);
    public static unsafe (Vector64<uint>   Value1, Vector64<uint>   Value2, Vector64<uint>   Value3) LoadAndReplicateToVector64x3(uint*   address);
    public static unsafe (Vector64<float>  Value1, Vector64<float>  Value2, Vector64<float>  Value3) LoadAndReplicateToVector64x3(float*  address);

    // LD4 (multiple structures)
    public static unsafe (Vector64<byte>   Value1, Vector64<byte>   Value2, Vector64<byte>   Value3, Vector64<byte>   Value4) LoadVector64x4(byte*   address);
    public static unsafe (Vector64<sbyte>  Value1, Vector64<sbyte>  Value2, Vector64<sbyte>  Value3, Vector64<sbyte>  Value4) LoadVector64x4(sbyte*  address);
    public static unsafe (Vector64<short>  Value1, Vector64<short>  Value2, Vector64<short>  Value3, Vector64<short>  Value4) LoadVector64x4(short*  address);
    public static unsafe (Vector64<ushort> Value1, Vector64<ushort> Value2, Vector64<ushort> Value3, Vector64<ushort> Value4) LoadVector64x4(ushort* address);
    public static unsafe (Vector64<int>    Value1, Vector64<int>    Value2, Vector64<int>    Value3, Vector64<int>    Value4) LoadVector64x4(int*    address);
    public static unsafe (Vector64<uint>   Value1, Vector64<uint>   Value2, Vector64<uint>   Value3, Vector64<uint>   Value4) LoadVector64x4(uint*   address);
    public static unsafe (Vector64<float>  Value1, Vector64<float>  Value2, Vector64<float>  Value3, Vector64<float>  Value4) LoadVector64x4(float*  address);

    // LD4 (single structure)
    public static unsafe (Vector64<byte>   Value1, Vector64<byte>   Value2, Vector64<byte>   Value3, Vector64<byte>   Value4) LoadVectorAndInsertScalar64x4((Vector64<byte>   Value1, Vector64<byte>   Value2, Vector64<byte>   Value3, Vector64<byte>   Value4) value, byte index, byte*   address);
    public static unsafe (Vector64<sbyte>  Value1, Vector64<sbyte>  Value2, Vector64<sbyte>  Value3, Vector64<sbyte>  Value4) LoadVectorAndInsertScalar64x4((Vector64<sbyte>  Value1, Vector64<sbyte>  Value2, Vector64<sbyte>  Value3, Vector64<sbyte>  Value4) value, byte index, sbyte*  address);
    public static unsafe (Vector64<short>  Value1, Vector64<short>  Value2, Vector64<short>  Value3, Vector64<short>  Value4) LoadVectorAndInsertScalar64x4((Vector64<short>  Value1, Vector64<short>  Value2, Vector64<short>  Value3, Vector64<short>  Value4) value, byte index, short*  address);
    public static unsafe (Vector64<ushort> Value1, Vector64<ushort> Value2, Vector64<ushort> Value3, Vector64<ushort> Value4) LoadVectorAndInsertScalar64x4((Vector64<ushort> Value1, Vector64<ushort> Value2, Vector64<ushort> Value3, Vector64<ushort> Value4) value, byte index, ushort* address);
    public static unsafe (Vector64<int>    Value1, Vector64<int>    Value2, Vector64<int>    Value3, Vector64<int>    Value4) LoadVectorAndInsertScalar64x4((Vector64<int>    Value1, Vector64<int>    Value2, Vector64<int>    Value3, Vector64<int>    Value4) value, byte index, int*    address);
    public static unsafe (Vector64<uint>   Value1, Vector64<uint>   Value2, Vector64<uint>   Value3, Vector64<uint>   Value4) LoadVectorAndInsertScalar64x4((Vector64<uint>   Value1, Vector64<uint>   Value2, Vector64<uint>   Value3, Vector64<uint>   Value4) value, byte index, uint*   address);
    public static unsafe (Vector64<float>  Value1, Vector64<float>  Value2, Vector64<float>  Value3, Vector64<float>  Value4) LoadVectorAndInsertScalar64x4((Vector64<float>  Value1, Vector64<float>  Value2, Vector64<float>  Value3, Vector64<float>  Value4) value, byte index, float*  address);

    // LD4R
    public static unsafe (Vector64<byte>   Value1, Vector64<byte>   Value2, Vector64<byte>   Value3, Vector64<byte>   Value4) LoadAndReplicateToVector64x4(byte*   address);
    public static unsafe (Vector64<sbyte>  Value1, Vector64<sbyte>  Value2, Vector64<sbyte>  Value3, Vector64<sbyte>  Value4) LoadAndReplicateToVector64x4(sbyte*  address);
    public static unsafe (Vector64<short>  Value1, Vector64<short>  Value2, Vector64<short>  Value3, Vector64<short>  Value4) LoadAndReplicateToVector64x4(short*  address);
    public static unsafe (Vector64<ushort> Value1, Vector64<ushort> Value2, Vector64<ushort> Value3, Vector64<ushort> Value4) LoadAndReplicateToVector64x4(ushort* address);
    public static unsafe (Vector64<int>    Value1, Vector64<int>    Value2, Vector64<int>    Value3, Vector64<int>    Value4) LoadAndReplicateToVector64x4(int*    address);
    public static unsafe (Vector64<uint>   Value1, Vector64<uint>   Value2, Vector64<uint>   Value3, Vector64<uint>   Value4) LoadAndReplicateToVector64x4(uint*   address);
    public static unsafe (Vector64<float>  Value1, Vector64<float>  Value2, Vector64<float>  Value3, Vector64<float>  Value4) LoadAndReplicateToVector64x4(float*  address);

    // ST1 (multiple structures)
    // StoreVector already present

    // ST1 (multiple structures) 2 register variant
    public static unsafe void StoreVector64x2AndUnzip(byte*   address, (Vector64<byte>   Value1, Vector64<byte>   Value2) value);
    public static unsafe void StoreVector64x2AndUnzip(sbyte*  address, (Vector64<sbyte>  Value1, Vector64<sbyte>  Value2) value);
    public static unsafe void StoreVector64x2AndUnzip(short*  address, (Vector64<short>  Value1, Vector64<short>  Value2) value);
    public static unsafe void StoreVector64x2AndUnzip(ushort* address, (Vector64<ushort> Value1, Vector64<ushort> Value2) value);
    public static unsafe void StoreVector64x2AndUnzip(int*    address, (Vector64<int>    Value1, Vector64<int>    Value2) value);
    public static unsafe void StoreVector64x2AndUnzip(uint*   address, (Vector64<uint>   Value1, Vector64<uint>   Value2) value);
    public static unsafe void StoreVector64x2AndUnzip(float*  address, (Vector64<float>  Value1, Vector64<float>  Value2) value);

    // ST1 (multiple structures) 3 register variant
    public static unsafe void StoreVector64x3AndUnzip(byte*   address, (Vector64<byte>   Value1, Vector64<byte>   Value2, Vector64<byte>   Value3) value);
    public static unsafe void StoreVector64x3AndUnzip(sbyte*  address, (Vector64<sbyte>  Value1, Vector64<sbyte>  Value2, Vector64<sbyte>  Value3) value);
    public static unsafe void StoreVector64x3AndUnzip(short*  address, (Vector64<short>  Value1, Vector64<short>  Value2, Vector64<short>  Value3) value);
    public static unsafe void StoreVector64x3AndUnzip(ushort* address, (Vector64<ushort> Value1, Vector64<ushort> Value2, Vector64<ushort> Value3) value);
    public static unsafe void StoreVector64x3AndUnzip(int*    address, (Vector64<int>    Value1, Vector64<int>    Value2, Vector64<int>    Value3) value);
    public static unsafe void StoreVector64x3AndUnzip(uint*   address, (Vector64<uint>   Value1, Vector64<uint>   Value2, Vector64<uint>   Value3) value);
    public static unsafe void StoreVector64x3AndUnzip(float*  address, (Vector64<float>  Value1, Vector64<float>  Value2, Vector64<float>  Value3) value);
    
    // ST1 (multiple structures) 4 register variant            
    public static unsafe void StoreVector64x4AndUnzip(byte*   address, (Vector64<byte>   Value1, Vector64<byte>   Value2, Vector64<byte>   Value3, Vector64<byte>   Value4) value);
    public static unsafe void StoreVector64x4AndUnzip(sbyte*  address, (Vector64<sbyte>  Value1, Vector64<sbyte>  Value2, Vector64<sbyte>  Value3, Vector64<sbyte>  Value4) value);
    public static unsafe void StoreVector64x4AndUnzip(short*  address, (Vector64<short>  Value1, Vector64<short>  Value2, Vector64<short>  Value3, Vector64<short>  Value4) value);
    public static unsafe void StoreVector64x4AndUnzip(ushort* address, (Vector64<ushort> Value1, Vector64<ushort> Value2, Vector64<ushort> Value3, Vector64<ushort> Value4) value);
    public static unsafe void StoreVector64x4AndUnzip(int*    address, (Vector64<int>    Value1, Vector64<int>    Value2, Vector64<int>    Value3, Vector64<int>    Value4) value);
    public static unsafe void StoreVector64x4AndUnzip(uint*   address, (Vector64<uint>   Value1, Vector64<uint>   Value2, Vector64<uint>   Value3, Vector64<uint>   Value4) value);
    public static unsafe void StoreVector64x4AndUnzip(float*  address, (Vector64<float>  Value1, Vector64<float>  Value2, Vector64<float>  Value3, Vector64<float>  Value4) value);

    // ST1 (single structure)
    // StoreSelectedScalar already present
    
    // ST2 (multiple structures)
    public static unsafe void StoreVector64x2(byte*   address, (Vector64<byte>   Value1, Vector64<byte>   Value2) value);
    public static unsafe void StoreVector64x2(sbyte*  address, (Vector64<sbyte>  Value1, Vector64<sbyte>  Value2) value);
    public static unsafe void StoreVector64x2(short*  address, (Vector64<short>  Value1, Vector64<short>  Value2) value);
    public static unsafe void StoreVector64x2(ushort* address, (Vector64<ushort> Value1, Vector64<ushort> Value2) value);
    public static unsafe void StoreVector64x2(int*    address, (Vector64<int>    Value1, Vector64<int>    Value2) value);
    public static unsafe void StoreVector64x2(uint*   address, (Vector64<uint>   Value1, Vector64<uint>   Value2) value);
    public static unsafe void StoreVector64x2(float*  address, (Vector64<float>  Value1, Vector64<float>  Value2) value);

    // ST2 (single structure)
    public static unsafe void StoreSelectedScalar64x2(byte*   address, (Vector64<byte>   Value1, Vector64<byte>   Value2) value, byte index);
    public static unsafe void StoreSelectedScalar64x2(sbyte*  address, (Vector64<sbyte>  Value1, Vector64<sbyte>  Value2) value, byte index);
    public static unsafe void StoreSelectedScalar64x2(short*  address, (Vector64<short>  Value1, Vector64<short>  Value2) value, byte index);
    public static unsafe void StoreSelectedScalar64x2(ushort* address, (Vector64<ushort> Value1, Vector64<ushort> Value2) value, byte index);
    public static unsafe void StoreSelectedScalar64x2(int*    address, (Vector64<int>    Value1, Vector64<int>    Value2) value, byte index);
    public static unsafe void StoreSelectedScalar64x2(uint*   address, (Vector64<uint>   Value1, Vector64<uint>   Value2) value, byte index);
    public static unsafe void StoreSelectedScalar64x2(float*  address, (Vector64<float>  Value1, Vector64<float>  Value2) value, byte index);

    // ST3 (multiple structures)
    public static unsafe void StoreVector64x3(byte*   address, (Vector64<byte>   Value1, Vector64<byte>   Value2, Vector64<byte>   Value3) value);
    public static unsafe void StoreVector64x3(sbyte*  address, (Vector64<sbyte>  Value1, Vector64<sbyte>  Value2, Vector64<sbyte>  Value3) value);
    public static unsafe void StoreVector64x3(short*  address, (Vector64<short>  Value1, Vector64<short>  Value2, Vector64<short>  Value3) value);
    public static unsafe void StoreVector64x3(ushort* address, (Vector64<ushort> Value1, Vector64<ushort> Value2, Vector64<ushort> Value3) value);
    public static unsafe void StoreVector64x3(int*    address, (Vector64<int>    Value1, Vector64<int>    Value2, Vector64<int>    Value3) value);
    public static unsafe void StoreVector64x3(uint*   address, (Vector64<uint>   Value1, Vector64<uint>   Value2, Vector64<uint>   Value3) value);
    public static unsafe void StoreVector64x3(float*  address, (Vector64<float>  Value1, Vector64<float>  Value2, Vector64<float>  Value3) value);

    // ST3 (single structure)
    public static unsafe void StoreSelectedScalar64x3(byte*   address, (Vector64<byte>   Value1, Vector64<byte>   Value2, Vector64<byte>   Value3) value, byte index);
    public static unsafe void StoreSelectedScalar64x3(sbyte*  address, (Vector64<sbyte>  Value1, Vector64<sbyte>  Value2, Vector64<sbyte>  Value3) value, byte index);
    public static unsafe void StoreSelectedScalar64x3(short*  address, (Vector64<short>  Value1, Vector64<short>  Value2, Vector64<short>  Value3) value, byte index);
    public static unsafe void StoreSelectedScalar64x3(ushort* address, (Vector64<ushort> Value1, Vector64<ushort> Value2, Vector64<ushort> Value3) value, byte index);
    public static unsafe void StoreSelectedScalar64x3(int*    address, (Vector64<int>    Value1, Vector64<int>    Value2, Vector64<int>    Value3) value, byte index);
    public static unsafe void StoreSelectedScalar64x3(uint*   address, (Vector64<uint>   Value1, Vector64<uint>   Value2, Vector64<uint>   Value3) value, byte index);
    public static unsafe void StoreSelectedScalar64x3(float*  address, (Vector64<float>  Value1, Vector64<float>  Value2, Vector64<float>  Value3) value, byte index);

    // ST4 (multiple structures)
    public static unsafe void StoreVector64x4(byte*   address, (Vector64<byte>   Value1, Vector64<byte>   Value2, Vector64<byte>   Value3, Vector64<byte>   Value4) value);
    public static unsafe void StoreVector64x4(sbyte*  address, (Vector64<sbyte>  Value1, Vector64<sbyte>  Value2, Vector64<sbyte>  Value3, Vector64<sbyte>  Value4) value);
    public static unsafe void StoreVector64x4(short*  address, (Vector64<short>  Value1, Vector64<short>  Value2, Vector64<short>  Value3, Vector64<short>  Value4) value);
    public static unsafe void StoreVector64x4(ushort* address, (Vector64<ushort> Value1, Vector64<ushort> Value2, Vector64<ushort> Value3, Vector64<ushort> Value4) value);
    public static unsafe void StoreVector64x4(int*    address, (Vector64<int>    Value1, Vector64<int>    Value2, Vector64<int>    Value3, Vector64<int>    Value4) value);
    public static unsafe void StoreVector64x4(uint*   address, (Vector64<uint>   Value1, Vector64<uint>   Value2, Vector64<uint>   Value3, Vector64<uint>   Value4) value);
    public static unsafe void StoreVector64x4(float*  address, (Vector64<float>  Value1, Vector64<float>  Value2, Vector64<float>  Value3, Vector64<float>  Value4) value);

    // ST4 (single structure)
    public static unsafe void StoreSelectedScalar64x4(byte*   address, (Vector64<byte>   Value1, Vector64<byte>   Value2, Vector64<byte>   Value3, Vector64<byte>   Value4) value, byte index);
    public static unsafe void StoreSelectedScalar64x4(sbyte*  address, (Vector64<sbyte>  Value1, Vector64<sbyte>  Value2, Vector64<sbyte>  Value3, Vector64<sbyte>  Value4) value, byte index);
    public static unsafe void StoreSelectedScalar64x4(short*  address, (Vector64<short>  Value1, Vector64<short>  Value2, Vector64<short>  Value3, Vector64<short>  Value4) value, byte index);
    public static unsafe void StoreSelectedScalar64x4(ushort* address, (Vector64<ushort> Value1, Vector64<ushort> Value2, Vector64<ushort> Value3, Vector64<ushort> Value4) value, byte index);
    public static unsafe void StoreSelectedScalar64x4(int*    address, (Vector64<int>    Value1, Vector64<int>    Value2, Vector64<int>    Value3, Vector64<int>    Value4) value, byte index);
    public static unsafe void StoreSelectedScalar64x4(uint*   address, (Vector64<uint>   Value1, Vector64<uint>   Value2, Vector64<uint>   Value3, Vector64<uint>   Value4) value, byte index);
    public static unsafe void StoreSelectedScalar64x4(float*  address, (Vector64<float>  Value1, Vector64<float>  Value2, Vector64<float>  Value3, Vector64<float>  Value4) value, byte index);

    public partial class Arm64
    {
        // LD1 (multiple structures)
        // LoadVector128 already present

        // LD1 (multiple structures) 2 register variant
        public static unsafe (Vector128<byte>   Value1, Vector128<byte>   Value2) LoadVector128x2AndUnzip(byte*   address);
        public static unsafe (Vector128<sbyte>  Value1, Vector128<sbyte>  Value2) LoadVector128x2AndUnzip(sbyte*  address);
        public static unsafe (Vector128<short>  Value1, Vector128<short>  Value2) LoadVector128x2AndUnzip(short*  address);
        public static unsafe (Vector128<ushort> Value1, Vector128<ushort> Value2) LoadVector128x2AndUnzip(ushort* address);
        public static unsafe (Vector128<int>    Value1, Vector128<int>    Value2) LoadVector128x2AndUnzip(int*    address);
        public static unsafe (Vector128<uint>   Value1, Vector128<uint>   Value2) LoadVector128x2AndUnzip(uint*   address);
        public static unsafe (Vector128<long>   Value1, Vector128<long>   Value2) LoadVector128x2AndUnzip(long*   address);
        public static unsafe (Vector128<ulong>  Value1, Vector128<ulong>  Value2) LoadVector128x2AndUnzip(ulong*  address);
        public static unsafe (Vector128<float>  Value1, Vector128<float>  Value2) LoadVector128x2AndUnzip(float*  address);
        public static unsafe (Vector128<double> Value1, Vector128<double> Value2) LoadVector128x2AndUnzip(double* address);

        // LD1 (multiple structures) 3 register variant
        public static unsafe (Vector128<byte>   Value1, Vector128<byte>   Value2, Vector128<byte>   Value3) LoadVector128x3AndUnzip(byte*   address);
        public static unsafe (Vector128<sbyte>  Value1, Vector128<sbyte>  Value2, Vector128<sbyte>  Value3) LoadVector128x3AndUnzip(sbyte*  address);
        public static unsafe (Vector128<short>  Value1, Vector128<short>  Value2, Vector128<short>  Value3) LoadVector128x3AndUnzip(short*  address);
        public static unsafe (Vector128<ushort> Value1, Vector128<ushort> Value2, Vector128<ushort> Value3) LoadVector128x3AndUnzip(ushort* address);
        public static unsafe (Vector128<int>    Value1, Vector128<int>    Value2, Vector128<int>    Value3) LoadVector128x3AndUnzip(int*    address);
        public static unsafe (Vector128<uint>   Value1, Vector128<uint>   Value2, Vector128<uint>   Value3) LoadVector128x3AndUnzip(uint*   address);
        public static unsafe (Vector128<long>   Value1, Vector128<long>   Value2, Vector128<long>   Value3) LoadVector128x3AndUnzip(long*   address);
        public static unsafe (Vector128<ulong>  Value1, Vector128<ulong>  Value2, Vector128<ulong>  Value3) LoadVector128x3AndUnzip(ulong*  address);
        public static unsafe (Vector128<float>  Value1, Vector128<float>  Value2, Vector128<float>  Value3) LoadVector128x3AndUnzip(float*  address);
        public static unsafe (Vector128<double> Value1, Vector128<double> Value2, Vector128<double> Value3) LoadVector128x3AndUnzip(double* address);
        
        // LD1 (multiple structures) 4 register variant            
        public static unsafe (Vector128<byte>   Value1, Vector128<byte>   Value2, Vector128<byte>   Value3, Vector128<byte>   Value4) LoadVector128x4AndUnzip(byte*   address);
        public static unsafe (Vector128<sbyte>  Value1, Vector128<sbyte>  Value2, Vector128<sbyte>  Value3, Vector128<sbyte>  Value4) LoadVector128x4AndUnzip(sbyte*  address);
        public static unsafe (Vector128<short>  Value1, Vector128<short>  Value2, Vector128<short>  Value3, Vector128<short>  Value4) LoadVector128x4AndUnzip(short*  address);
        public static unsafe (Vector128<ushort> Value1, Vector128<ushort> Value2, Vector128<ushort> Value3, Vector128<ushort> Value4) LoadVector128x4AndUnzip(ushort* address);
        public static unsafe (Vector128<int>    Value1, Vector128<int>    Value2, Vector128<int>    Value3, Vector128<int>    Value4) LoadVector128x4AndUnzip(int*    address);
        public static unsafe (Vector128<uint>   Value1, Vector128<uint>   Value2, Vector128<uint>   Value3, Vector128<uint>   Value4) LoadVector128x4AndUnzip(uint*   address);
        public static unsafe (Vector128<long>   Value1, Vector128<long>   Value2, Vector128<long>   Value3, Vector128<long>   Value4) LoadVector128x4AndUnzip(long*   address);
        public static unsafe (Vector128<ulong>  Value1, Vector128<ulong>  Value2, Vector128<ulong>  Value3, Vector128<ulong>  Value4) LoadVector128x4AndUnzip(ulong*  address);
        public static unsafe (Vector128<float>  Value1, Vector128<float>  Value2, Vector128<float>  Value3, Vector128<float>  Value4) LoadVector128x4AndUnzip(float*  address);
        public static unsafe (Vector128<double> Value1, Vector128<double> Value2, Vector128<double> Value3, Vector128<double> Value4) LoadVector128x4AndUnzip(double* address);

        // LD1 (single structure)
        // LoadAndInsertScalar already present

        // LD1R
        // LoadAndReplicateToVector128 already present
        
        // LD2 (multiple structures)
        public static unsafe (Vector128<byte>   Value1, Vector128<byte>   Value2) LoadVector128x2(byte*   address);
        public static unsafe (Vector128<sbyte>  Value1, Vector128<sbyte>  Value2) LoadVector128x2(sbyte*  address);
        public static unsafe (Vector128<short>  Value1, Vector128<short>  Value2) LoadVector128x2(short*  address);
        public static unsafe (Vector128<ushort> Value1, Vector128<ushort> Value2) LoadVector128x2(ushort* address);
        public static unsafe (Vector128<int>    Value1, Vector128<int>    Value2) LoadVector128x2(int*    address);
        public static unsafe (Vector128<uint>   Value1, Vector128<uint>   Value2) LoadVector128x2(uint*   address);
        public static unsafe (Vector128<long>   Value1, Vector128<long>   Value2) LoadVector128x2(long*   address);
        public static unsafe (Vector128<ulong>  Value1, Vector128<ulong>  Value2) LoadVector128x2(ulong*  address);
        public static unsafe (Vector128<float>  Value1, Vector128<float>  Value2) LoadVector128x2(float*  address);
        public static unsafe (Vector128<double> Value1, Vector128<double> Value2) LoadVector128x2(double* address);

        // LD2 (single structure)
        public static unsafe (Vector128<byte>   Value1, Vector128<byte>   Value2) LoadVectorAndInsertScalar128x2((Vector128<byte>   Value1, Vector128<byte>   Value2) value, byte index, byte*   address);
        public static unsafe (Vector128<sbyte>  Value1, Vector128<sbyte>  Value2) LoadVectorAndInsertScalar128x2((Vector128<sbyte>  Value1, Vector128<sbyte>  Value2) value, byte index, sbyte*  address);
        public static unsafe (Vector128<short>  Value1, Vector128<short>  Value2) LoadVectorAndInsertScalar128x2((Vector128<short>  Value1, Vector128<short>  Value2) value, byte index, short*  address);
        public static unsafe (Vector128<ushort> Value1, Vector128<ushort> Value2) LoadVectorAndInsertScalar128x2((Vector128<ushort> Value1, Vector128<ushort> Value2) value, byte index, ushort* address);
        public static unsafe (Vector128<int>    Value1, Vector128<int>    Value2) LoadVectorAndInsertScalar128x2((Vector128<int>    Value1, Vector128<int>    Value2) value, byte index, int*    address);
        public static unsafe (Vector128<uint>   Value1, Vector128<uint>   Value2) LoadVectorAndInsertScalar128x2((Vector128<uint>   Value1, Vector128<uint>   Value2) value, byte index, uint*   address);
        public static unsafe (Vector128<long>   Value1, Vector128<long>   Value2) LoadVectorAndInsertScalar128x2((Vector128<long>   Value1, Vector128<long>   Value2) value, byte index, long*   address);
        public static unsafe (Vector128<ulong>  Value1, Vector128<ulong>  Value2) LoadVectorAndInsertScalar128x2((Vector128<ulong>  Value1, Vector128<ulong>  Value2) value, byte index, ulong*  address);
        public static unsafe (Vector128<float>  Value1, Vector128<float>  Value2) LoadVectorAndInsertScalar128x2((Vector128<float>  Value1, Vector128<float>  Value2) value, byte index, float*  address);
        public static unsafe (Vector128<double> Value1, Vector128<double> Value2) LoadVectorAndInsertScalar128x2((Vector128<double> Value1, Vector128<double> Value2) value, byte index, double* address);

        // LD2R
        public static unsafe (Vector128<byte>   Value1, Vector128<byte>   Value2) LoadAndReplicateToVector128x2(byte*   address);
        public static unsafe (Vector128<sbyte>  Value1, Vector128<sbyte>  Value2) LoadAndReplicateToVector128x2(sbyte*  address);
        public static unsafe (Vector128<short>  Value1, Vector128<short>  Value2) LoadAndReplicateToVector128x2(short*  address);
        public static unsafe (Vector128<ushort> Value1, Vector128<ushort> Value2) LoadAndReplicateToVector128x2(ushort* address);
        public static unsafe (Vector128<int>    Value1, Vector128<int>    Value2) LoadAndReplicateToVector128x2(int*    address);
        public static unsafe (Vector128<uint>   Value1, Vector128<uint>   Value2) LoadAndReplicateToVector128x2(uint*   address);
        public static unsafe (Vector128<long>   Value1, Vector128<long>   Value2) LoadAndReplicateToVector128x2(long*   address);
        public static unsafe (Vector128<ulong>  Value1, Vector128<ulong>  Value2) LoadAndReplicateToVector128x2(ulong*  address);
        public static unsafe (Vector128<float>  Value1, Vector128<float>  Value2) LoadAndReplicateToVector128x2(float*  address);
        public static unsafe (Vector128<double> Value1, Vector128<double> Value2) LoadAndReplicateToVector128x2(double* address);

        // LD3 (multiple structures)
        public static unsafe (Vector128<byte>   Value1, Vector128<byte>   Value2, Vector128<byte>   Value3) LoadVector128x3(byte*   address);
        public static unsafe (Vector128<sbyte>  Value1, Vector128<sbyte>  Value2, Vector128<sbyte>  Value3) LoadVector128x3(sbyte*  address);
        public static unsafe (Vector128<short>  Value1, Vector128<short>  Value2, Vector128<short>  Value3) LoadVector128x3(short*  address);
        public static unsafe (Vector128<ushort> Value1, Vector128<ushort> Value2, Vector128<ushort> Value3) LoadVector128x3(ushort* address);
        public static unsafe (Vector128<int>    Value1, Vector128<int>    Value2, Vector128<int>    Value3) LoadVector128x3(int*    address);
        public static unsafe (Vector128<uint>   Value1, Vector128<uint>   Value2, Vector128<uint>   Value3) LoadVector128x3(uint*   address);
        public static unsafe (Vector128<long>   Value1, Vector128<long>   Value2, Vector128<long>   Value3) LoadVector128x3(long*   address);
        public static unsafe (Vector128<ulong>  Value1, Vector128<ulong>  Value2, Vector128<ulong>  Value3) LoadVector128x3(ulong*  address);
        public static unsafe (Vector128<float>  Value1, Vector128<float>  Value2, Vector128<float>  Value3) LoadVector128x3(float*  address);
        public static unsafe (Vector128<double> Value1, Vector128<double> Value2, Vector128<double> Value3) LoadVector128x3(double* address);

        // LD3 (single structure)
        public static unsafe (Vector128<byte>   Value1, Vector128<byte>   Value2, Vector128<byte>   Value3) LoadVectorAndInsertScalar128x3((Vector128<byte>   Value1, Vector128<byte>   Value2, Vector128<byte>   Value3) value, byte index, byte*   address);
        public static unsafe (Vector128<sbyte>  Value1, Vector128<sbyte>  Value2, Vector128<sbyte>  Value3) LoadVectorAndInsertScalar128x3((Vector128<sbyte>  Value1, Vector128<sbyte>  Value2, Vector128<sbyte>  Value3) value, byte index, sbyte*  address);
        public static unsafe (Vector128<short>  Value1, Vector128<short>  Value2, Vector128<short>  Value3) LoadVectorAndInsertScalar128x3((Vector128<short>  Value1, Vector128<short>  Value2, Vector128<short>  Value3) value, byte index, short*  address);
        public static unsafe (Vector128<ushort> Value1, Vector128<ushort> Value2, Vector128<ushort> Value3) LoadVectorAndInsertScalar128x3((Vector128<ushort> Value1, Vector128<ushort> Value2, Vector128<ushort> Value3) value, byte index, ushort* address);
        public static unsafe (Vector128<int>    Value1, Vector128<int>    Value2, Vector128<int>    Value3) LoadVectorAndInsertScalar128x3((Vector128<int>    Value1, Vector128<int>    Value2, Vector128<int>    Value3) value, byte index, int*    address);
        public static unsafe (Vector128<uint>   Value1, Vector128<uint>   Value2, Vector128<uint>   Value3) LoadVectorAndInsertScalar128x3((Vector128<uint>   Value1, Vector128<uint>   Value2, Vector128<uint>   Value3) value, byte index, uint*   address);
        public static unsafe (Vector128<long>   Value1, Vector128<long>   Value2, Vector128<long>   Value3) LoadVectorAndInsertScalar128x3((Vector128<long>   Value1, Vector128<long>   Value2, Vector128<long>   Value3) value, byte index, long*   address);
        public static unsafe (Vector128<ulong>  Value1, Vector128<ulong>  Value2, Vector128<ulong>  Value3) LoadVectorAndInsertScalar128x3((Vector128<ulong>  Value1, Vector128<ulong>  Value2, Vector128<ulong>  Value3) value, byte index, ulong*  address);
        public static unsafe (Vector128<float>  Value1, Vector128<float>  Value2, Vector128<float>  Value3) LoadVectorAndInsertScalar128x3((Vector128<float>  Value1, Vector128<float>  Value2, Vector128<float>  Value3) value, byte index, float*  address);
        public static unsafe (Vector128<double> Value1, Vector128<double> Value2, Vector128<double> Value3) LoadVectorAndInsertScalar128x3((Vector128<double> Value1, Vector128<double> Value2, Vector128<double> Value3) value, byte index, double* address);

        // LD3R
        public static unsafe (Vector128<byte>   Value1, Vector128<byte>   Value2, Vector128<byte>   Value3) LoadAndReplicateToVector128x3(byte*   address);
        public static unsafe (Vector128<sbyte>  Value1, Vector128<sbyte>  Value2, Vector128<sbyte>  Value3) LoadAndReplicateToVector128x3(sbyte*  address);
        public static unsafe (Vector128<short>  Value1, Vector128<short>  Value2, Vector128<short>  Value3) LoadAndReplicateToVector128x3(short*  address);
        public static unsafe (Vector128<ushort> Value1, Vector128<ushort> Value2, Vector128<ushort> Value3) LoadAndReplicateToVector128x3(ushort* address);
        public static unsafe (Vector128<int>    Value1, Vector128<int>    Value2, Vector128<int>    Value3) LoadAndReplicateToVector128x3(int*    address);
        public static unsafe (Vector128<uint>   Value1, Vector128<uint>   Value2, Vector128<uint>   Value3) LoadAndReplicateToVector128x3(uint*   address);
        public static unsafe (Vector128<long>   Value1, Vector128<long>   Value2, Vector128<long>   Value3) LoadAndReplicateToVector128x3(long*   address);
        public static unsafe (Vector128<ulong>  Value1, Vector128<ulong>  Value2, Vector128<ulong>  Value3) LoadAndReplicateToVector128x3(ulong*  address);
        public static unsafe (Vector128<float>  Value1, Vector128<float>  Value2, Vector128<float>  Value3) LoadAndReplicateToVector128x3(float*  address);
        public static unsafe (Vector128<double> Value1, Vector128<double> Value2, Vector128<double> Value3) LoadAndReplicateToVector128x3(double* address);

        // LD4 (multiple structures)
        public static unsafe (Vector128<byte>   Value1, Vector128<byte>   Value2, Vector128<byte>   Value3, Vector128<byte>   Value4) LoadVector128x4(byte*   address);
        public static unsafe (Vector128<sbyte>  Value1, Vector128<sbyte>  Value2, Vector128<sbyte>  Value3, Vector128<sbyte>  Value4) LoadVector128x4(sbyte*  address);
        public static unsafe (Vector128<short>  Value1, Vector128<short>  Value2, Vector128<short>  Value3, Vector128<short>  Value4) LoadVector128x4(short*  address);
        public static unsafe (Vector128<ushort> Value1, Vector128<ushort> Value2, Vector128<ushort> Value3, Vector128<ushort> Value4) LoadVector128x4(ushort* address);
        public static unsafe (Vector128<int>    Value1, Vector128<int>    Value2, Vector128<int>    Value3, Vector128<int>    Value4) LoadVector128x4(int*    address);
        public static unsafe (Vector128<uint>   Value1, Vector128<uint>   Value2, Vector128<uint>   Value3, Vector128<uint>   Value4) LoadVector128x4(uint*   address);
        public static unsafe (Vector128<long>   Value1, Vector128<long>   Value2, Vector128<long>   Value3, Vector128<long>   Value4) LoadVector128x4(long*   address);
        public static unsafe (Vector128<ulong>  Value1, Vector128<ulong>  Value2, Vector128<ulong>  Value3, Vector128<ulong>  Value4) LoadVector128x4(ulong*  address);
        public static unsafe (Vector128<float>  Value1, Vector128<float>  Value2, Vector128<float>  Value3, Vector128<float>  Value4) LoadVector128x4(float*  address);
        public static unsafe (Vector128<double> Value1, Vector128<double> Value2, Vector128<double> Value3, Vector128<double> Value4) LoadVector128x4(double* address);

        // LD4 (single structure)
        public static unsafe (Vector128<byte>   Value1, Vector128<byte>   Value2, Vector128<byte>   Value3, Vector128<byte>   Value4) LoadVectorAndInsertScalar128x4((Vector128<byte>   Value1, Vector128<byte>   Value2, Vector128<byte>   Value3, Vector128<byte>   Value4) value, byte index, byte*   address);
        public static unsafe (Vector128<sbyte>  Value1, Vector128<sbyte>  Value2, Vector128<sbyte>  Value3, Vector128<sbyte>  Value4) LoadVectorAndInsertScalar128x4((Vector128<sbyte>  Value1, Vector128<sbyte>  Value2, Vector128<sbyte>  Value3, Vector128<sbyte>  Value4) value, byte index, sbyte*  address);
        public static unsafe (Vector128<short>  Value1, Vector128<short>  Value2, Vector128<short>  Value3, Vector128<short>  Value4) LoadVectorAndInsertScalar128x4((Vector128<short>  Value1, Vector128<short>  Value2, Vector128<short>  Value3, Vector128<short>  Value4) value, byte index, short*  address);
        public static unsafe (Vector128<ushort> Value1, Vector128<ushort> Value2, Vector128<ushort> Value3, Vector128<ushort> Value4) LoadVectorAndInsertScalar128x4((Vector128<ushort> Value1, Vector128<ushort> Value2, Vector128<ushort> Value3, Vector128<ushort> Value4) value, byte index, ushort* address);
        public static unsafe (Vector128<int>    Value1, Vector128<int>    Value2, Vector128<int>    Value3, Vector128<int>    Value4) LoadVectorAndInsertScalar128x4((Vector128<int>    Value1, Vector128<int>    Value2, Vector128<int>    Value3, Vector128<int>    Value4) value, byte index, int*    address);
        public static unsafe (Vector128<uint>   Value1, Vector128<uint>   Value2, Vector128<uint>   Value3, Vector128<uint>   Value4) LoadVectorAndInsertScalar128x4((Vector128<uint>   Value1, Vector128<uint>   Value2, Vector128<uint>   Value3, Vector128<uint>   Value4) value, byte index, uint*   address);
        public static unsafe (Vector128<long>   Value1, Vector128<long>   Value2, Vector128<long>   Value3, Vector128<long>   Value4) LoadVectorAndInsertScalar128x4((Vector128<long>   Value1, Vector128<long>   Value2, Vector128<long>   Value3, Vector128<long>   Value4) value, byte index, long*   address);
        public static unsafe (Vector128<ulong>  Value1, Vector128<ulong>  Value2, Vector128<ulong>  Value3, Vector128<ulong>  Value4) LoadVectorAndInsertScalar128x4((Vector128<ulong>  Value1, Vector128<ulong>  Value2, Vector128<ulong>  Value3, Vector128<ulong>  Value4) value, byte index, ulong*  address);
        public static unsafe (Vector128<float>  Value1, Vector128<float>  Value2, Vector128<float>  Value3, Vector128<float>  Value4) LoadVectorAndInsertScalar128x4((Vector128<float>  Value1, Vector128<float>  Value2, Vector128<float>  Value3, Vector128<float>  Value4) value, byte index, float*  address);
        public static unsafe (Vector128<double> Value1, Vector128<double> Value2, Vector128<double> Value3, Vector128<double> Value4) LoadVectorAndInsertScalar128x4((Vector128<double> Value1, Vector128<double> Value2, Vector128<double> Value3, Vector128<double> Value4) value, byte index, double* address);

        // LD4R
        public static unsafe(Vector128<byte>   Value1, Vector128<byte>   Value2, Vector128<byte>   Value3, Vector128<byte>   Value4) LoadAndReplicateToVector128x4(byte*   address);
        public static unsafe(Vector128<sbyte>  Value1, Vector128<sbyte>  Value2, Vector128<sbyte>  Value3, Vector128<sbyte>  Value4) LoadAndReplicateToVector128x4(sbyte*  address);
        public static unsafe(Vector128<short>  Value1, Vector128<short>  Value2, Vector128<short>  Value3, Vector128<short>  Value4) LoadAndReplicateToVector128x4(short*  address);
        public static unsafe(Vector128<ushort> Value1, Vector128<ushort> Value2, Vector128<ushort> Value3, Vector128<ushort> Value4) LoadAndReplicateToVector128x4(ushort* address);
        public static unsafe(Vector128<int>    Value1, Vector128<int>    Value2, Vector128<int>    Value3, Vector128<int>    Value4) LoadAndReplicateToVector128x4(int*    address);
        public static unsafe(Vector128<uint>   Value1, Vector128<uint>   Value2, Vector128<uint>   Value3, Vector128<uint>   Value4) LoadAndReplicateToVector128x4(uint*   address);
        public static unsafe(Vector128<long>   Value1, Vector128<long>   Value2, Vector128<long>   Value3, Vector128<long>   Value4) LoadAndReplicateToVector128x4(long*   address);
        public static unsafe(Vector128<ulong>  Value1, Vector128<ulong>  Value2, Vector128<ulong>  Value3, Vector128<ulong>  Value4) LoadAndReplicateToVector128x4(ulong*  address);
        public static unsafe(Vector128<float>  Value1, Vector128<float>  Value2, Vector128<float>  Value3, Vector128<float>  Value4) LoadAndReplicateToVector128x4(float*  address);
        public static unsafe(Vector128<double> Value1, Vector128<double> Value2, Vector128<double> Value3, Vector128<double> Value4) LoadAndReplicateToVector128x4(double* address);

        // ST1 (multiple structures)
        // StoreVector already present

        // ST1 (multiple structures) 2 register variant
        public static unsafe void StoreVector128x2AndUnzip(byte*   address, (Vector128<byte>   Value1, Vector128<byte>   Value2) value);
        public static unsafe void StoreVector128x2AndUnzip(sbyte*  address, (Vector128<sbyte>  Value1, Vector128<sbyte>  Value2) value);
        public static unsafe void StoreVector128x2AndUnzip(short*  address, (Vector128<short>  Value1, Vector128<short>  Value2) value);
        public static unsafe void StoreVector128x2AndUnzip(ushort* address, (Vector128<ushort> Value1, Vector128<ushort> Value2) value);
        public static unsafe void StoreVector128x2AndUnzip(int*    address, (Vector128<int>    Value1, Vector128<int>    Value2) value);
        public static unsafe void StoreVector128x2AndUnzip(uint*   address, (Vector128<uint>   Value1, Vector128<uint>   Value2) value);
        public static unsafe void StoreVector128x2AndUnzip(long*   address, (Vector128<long>   Value1, Vector128<long>   Value2) value);
        public static unsafe void StoreVector128x2AndUnzip(ulong*  address, (Vector128<ulong>  Value1, Vector128<ulong>  Value2) value);
        public static unsafe void StoreVector128x2AndUnzip(float*  address, (Vector128<float>  Value1, Vector128<float>  Value2) value);
        public static unsafe void StoreVector128x2AndUnzip(double* address, (Vector128<double> Value1, Vector128<double> Value2) value);

        // ST1 (multiple structures) 3 register variant
        public static unsafe void StoreVector128x3AndUnzip(byte*   address, (Vector128<byte>   Value1, Vector128<byte>   Value2, Vector128<byte>   Value3) value);
        public static unsafe void StoreVector128x3AndUnzip(sbyte*  address, (Vector128<sbyte>  Value1, Vector128<sbyte>  Value2, Vector128<sbyte>  Value3) value);
        public static unsafe void StoreVector128x3AndUnzip(short*  address, (Vector128<short>  Value1, Vector128<short>  Value2, Vector128<short>  Value3) value);
        public static unsafe void StoreVector128x3AndUnzip(ushort* address, (Vector128<ushort> Value1, Vector128<ushort> Value2, Vector128<ushort> Value3) value);
        public static unsafe void StoreVector128x3AndUnzip(int*    address, (Vector128<int>    Value1, Vector128<int>    Value2, Vector128<int>    Value3) value);
        public static unsafe void StoreVector128x3AndUnzip(uint*   address, (Vector128<uint>   Value1, Vector128<uint>   Value2, Vector128<uint>   Value3) value);
        public static unsafe void StoreVector128x3AndUnzip(long*   address, (Vector128<long>   Value1, Vector128<long>   Value2, Vector128<long>   Value3) value);
        public static unsafe void StoreVector128x3AndUnzip(ulong*  address, (Vector128<ulong>  Value1, Vector128<ulong>  Value2, Vector128<ulong>  Value3) value);
        public static unsafe void StoreVector128x3AndUnzip(float*  address, (Vector128<float>  Value1, Vector128<float>  Value2, Vector128<float>  Value3) value);
        public static unsafe void StoreVector128x3AndUnzip(double* address, (Vector128<double> Value1, Vector128<double> Value2, Vector128<double> Value3) value);
        
        // ST1 (multiple structures) 4 register variant
        public static unsafe void StoreVector128x4AndUnzip(byte*   address, (Vector128<byte>   Value1, Vector128<byte>   Value2, Vector128<byte>   Value3, Vector128<byte>   Value4) value);
        public static unsafe void StoreVector128x4AndUnzip(sbyte*  address, (Vector128<sbyte>  Value1, Vector128<sbyte>  Value2, Vector128<sbyte>  Value3, Vector128<sbyte>  Value4) value);
        public static unsafe void StoreVector128x4AndUnzip(short*  address, (Vector128<short>  Value1, Vector128<short>  Value2, Vector128<short>  Value3, Vector128<short>  Value4) value);
        public static unsafe void StoreVector128x4AndUnzip(ushort* address, (Vector128<ushort> Value1, Vector128<ushort> Value2, Vector128<ushort> Value3, Vector128<ushort> Value4) value);
        public static unsafe void StoreVector128x4AndUnzip(int*    address, (Vector128<int>    Value1, Vector128<int>    Value2, Vector128<int>    Value3, Vector128<int>    Value4) value);
        public static unsafe void StoreVector128x4AndUnzip(uint*   address, (Vector128<uint>   Value1, Vector128<uint>   Value2, Vector128<uint>   Value3, Vector128<uint>   Value4) value);
        public static unsafe void StoreVector128x4AndUnzip(long*   address, (Vector128<long>   Value1, Vector128<long>   Value2, Vector128<long>   Value3, Vector128<long>   Value4) value);
        public static unsafe void StoreVector128x4AndUnzip(ulong*  address, (Vector128<ulong>  Value1, Vector128<ulong>  Value2, Vector128<ulong>  Value3, Vector128<ulong>  Value4) value);
        public static unsafe void StoreVector128x4AndUnzip(float*  address, (Vector128<float>  Value1, Vector128<float>  Value2, Vector128<float>  Value3, Vector128<float>  Value4) value);
        public static unsafe void StoreVector128x4AndUnzip(double* address, (Vector128<double> Value1, Vector128<double> Value2, Vector128<double> Value3, Vector128<double> Value4) value);

        // ST1 (single structure)
        // StoreSelectedScalar already present
        
        // ST2 (multiple structures)
        public static unsafe void StoreVector128x2(byte*   address, (Vector128<byte>   Value1, Vector128<byte>   Value2) value);
        public static unsafe void StoreVector128x2(sbyte*  address, (Vector128<sbyte>  Value1, Vector128<sbyte>  Value2) value);
        public static unsafe void StoreVector128x2(short*  address, (Vector128<short>  Value1, Vector128<short>  Value2) value);
        public static unsafe void StoreVector128x2(ushort* address, (Vector128<ushort> Value1, Vector128<ushort> Value2) value);
        public static unsafe void StoreVector128x2(int*    address, (Vector128<int>    Value1, Vector128<int>    Value2) value);
        public static unsafe void StoreVector128x2(uint*   address, (Vector128<uint>   Value1, Vector128<uint>   Value2) value);
        public static unsafe void StoreVector128x2(long*   address, (Vector128<long>   Value1, Vector128<long>   Value2) value);
        public static unsafe void StoreVector128x2(ulong*  address, (Vector128<ulong>  Value1, Vector128<ulong>  Value2) value);
        public static unsafe void StoreVector128x2(float*  address, (Vector128<float>  Value1, Vector128<float>  Value2) value);
        public static unsafe void StoreVector128x2(double* address, (Vector128<double> Value1, Vector128<double> Value2) value);

        // ST2 (single structure)
        public static unsafe void StoreSelectedScalar128x2(byte*   address, (Vector128<byte>   Value1, Vector128<byte>   Value2) value, byte index);
        public static unsafe void StoreSelectedScalar128x2(sbyte*  address, (Vector128<sbyte>  Value1, Vector128<sbyte>  Value2) value, byte index);
        public static unsafe void StoreSelectedScalar128x2(short*  address, (Vector128<short>  Value1, Vector128<short>  Value2) value, byte index);
        public static unsafe void StoreSelectedScalar128x2(ushort* address, (Vector128<ushort> Value1, Vector128<ushort> Value2) value, byte index);
        public static unsafe void StoreSelectedScalar128x2(int*    address, (Vector128<int>    Value1, Vector128<int>    Value2) value, byte index);
        public static unsafe void StoreSelectedScalar128x2(uint*   address, (Vector128<uint>   Value1, Vector128<uint>   Value2) value, byte index);
        public static unsafe void StoreSelectedScalar128x2(long*   address, (Vector128<long>   Value1, Vector128<long>   Value2) value, byte index);
        public static unsafe void StoreSelectedScalar128x2(ulong*  address, (Vector128<ulong>  Value1, Vector128<ulong>  Value2) value, byte index);
        public static unsafe void StoreSelectedScalar128x2(float*  address, (Vector128<float>  Value1, Vector128<float>  Value2) value, byte index);
        public static unsafe void StoreSelectedScalar128x2(double* address, (Vector128<double> Value1, Vector128<double> Value2) value, byte index);

        // ST3 (multiple structures)
        public static unsafe void StoreVector128x3(byte*   address, (Vector128<byte>   Value1, Vector128<byte>   Value2, Vector128<byte>   Value3) value);
        public static unsafe void StoreVector128x3(sbyte*  address, (Vector128<sbyte>  Value1, Vector128<sbyte>  Value2, Vector128<sbyte>  Value3) value);
        public static unsafe void StoreVector128x3(short*  address, (Vector128<short>  Value1, Vector128<short>  Value2, Vector128<short>  Value3) value);
        public static unsafe void StoreVector128x3(ushort* address, (Vector128<ushort> Value1, Vector128<ushort> Value2, Vector128<ushort> Value3) value);
        public static unsafe void StoreVector128x3(int*    address, (Vector128<int>    Value1, Vector128<int>    Value2, Vector128<int>    Value3) value);
        public static unsafe void StoreVector128x3(uint*   address, (Vector128<uint>   Value1, Vector128<uint>   Value2, Vector128<uint>   Value3) value);
        public static unsafe void StoreVector128x3(long*   address, (Vector128<long>   Value1, Vector128<long>   Value2, Vector128<long>   Value3) value);
        public static unsafe void StoreVector128x3(ulong*  address, (Vector128<ulong>  Value1, Vector128<ulong>  Value2, Vector128<ulong>  Value3) value);
        public static unsafe void StoreVector128x3(float*  address, (Vector128<float>  Value1, Vector128<float>  Value2, Vector128<float>  Value3) value);
        public static unsafe void StoreVector128x3(double* address, (Vector128<double> Value1, Vector128<double> Value2, Vector128<double> Value3) value);

        // ST3 (single structure)
        public static unsafe void StoreSelectedScalar128x3(byte*   address, (Vector128<byte>   Value1, Vector128<byte>   Value2, Vector128<byte>   Value3) value, byte index);
        public static unsafe void StoreSelectedScalar128x3(sbyte*  address, (Vector128<sbyte>  Value1, Vector128<sbyte>  Value2, Vector128<sbyte>  Value3) value, byte index);
        public static unsafe void StoreSelectedScalar128x3(short*  address, (Vector128<short>  Value1, Vector128<short>  Value2, Vector128<short>  Value3) value, byte index);
        public static unsafe void StoreSelectedScalar128x3(ushort* address, (Vector128<ushort> Value1, Vector128<ushort> Value2, Vector128<ushort> Value3) value, byte index);
        public static unsafe void StoreSelectedScalar128x3(int*    address, (Vector128<int>    Value1, Vector128<int>    Value2, Vector128<int>    Value3) value, byte index);
        public static unsafe void StoreSelectedScalar128x3(uint*   address, (Vector128<uint>   Value1, Vector128<uint>   Value2, Vector128<uint>   Value3) value, byte index);
        public static unsafe void StoreSelectedScalar128x3(long*   address, (Vector128<long>   Value1, Vector128<long>   Value2, Vector128<long>   Value3) value, byte index);
        public static unsafe void StoreSelectedScalar128x3(ulong*  address, (Vector128<ulong>  Value1, Vector128<ulong>  Value2, Vector128<ulong>  Value3) value, byte index);
        public static unsafe void StoreSelectedScalar128x3(float*  address, (Vector128<float>  Value1, Vector128<float>  Value2, Vector128<float>  Value3) value, byte index);
        public static unsafe void StoreSelectedScalar128x3(double* address, (Vector128<double> Value1, Vector128<double> Value2, Vector128<double> Value3) value, byte index);

        // ST4 (multiple structures)
        public static unsafe void StoreVector128x4(byte*   address, (Vector128<byte>   Value1, Vector128<byte>   Value2, Vector128<byte>   Value3, Vector128<byte>   Value4) value);
        public static unsafe void StoreVector128x4(sbyte*  address, (Vector128<sbyte>  Value1, Vector128<sbyte>  Value2, Vector128<sbyte>  Value3, Vector128<sbyte>  Value4) value);
        public static unsafe void StoreVector128x4(short*  address, (Vector128<short>  Value1, Vector128<short>  Value2, Vector128<short>  Value3, Vector128<short>  Value4) value);
        public static unsafe void StoreVector128x4(ushort* address, (Vector128<ushort> Value1, Vector128<ushort> Value2, Vector128<ushort> Value3, Vector128<ushort> Value4) value);
        public static unsafe void StoreVector128x4(int*    address, (Vector128<int>    Value1, Vector128<int>    Value2, Vector128<int>    Value3, Vector128<int>    Value4) value);
        public static unsafe void StoreVector128x4(uint*   address, (Vector128<uint>   Value1, Vector128<uint>   Value2, Vector128<uint>   Value3, Vector128<uint>   Value4) value);
        public static unsafe void StoreVector128x4(long*   address, (Vector128<long>   Value1, Vector128<long>   Value2, Vector128<long>   Value3, Vector128<long>   Value4) value);
        public static unsafe void StoreVector128x4(ulong*  address, (Vector128<ulong>  Value1, Vector128<ulong>  Value2, Vector128<ulong>  Value3, Vector128<ulong>  Value4) value);
        public static unsafe void StoreVector128x4(float*  address, (Vector128<float>  Value1, Vector128<float>  Value2, Vector128<float>  Value3, Vector128<float>  Value4) value);
        public static unsafe void StoreVector128x4(double* address, (Vector128<double> Value1, Vector128<double> Value2, Vector128<double> Value3, Vector128<double> Value4) value);

        // ST4 (single structure)
        public static unsafe void StoreSelectedScalar128x4(byte*   address, (Vector128<byte>   Value1, Vector128<byte>   Value2, Vector128<byte>   Value3, Vector128<byte>   Value4) value, byte index);
        public static unsafe void StoreSelectedScalar128x4(sbyte*  address, (Vector128<sbyte>  Value1, Vector128<sbyte>  Value2, Vector128<sbyte>  Value3, Vector128<sbyte>  Value4) value, byte index);
        public static unsafe void StoreSelectedScalar128x4(short*  address, (Vector128<short>  Value1, Vector128<short>  Value2, Vector128<short>  Value3, Vector128<short>  Value4) value, byte index);
        public static unsafe void StoreSelectedScalar128x4(ushort* address, (Vector128<ushort> Value1, Vector128<ushort> Value2, Vector128<ushort> Value3, Vector128<ushort> Value4) value, byte index);
        public static unsafe void StoreSelectedScalar128x4(int*    address, (Vector128<int>    Value1, Vector128<int>    Value2, Vector128<int>    Value3, Vector128<int>    Value4) value, byte index);
        public static unsafe void StoreSelectedScalar128x4(uint*   address, (Vector128<uint>   Value1, Vector128<uint>   Value2, Vector128<uint>   Value3, Vector128<uint>   Value4) value, byte index);
        public static unsafe void StoreSelectedScalar128x4(long*   address, (Vector128<long>   Value1, Vector128<long>   Value2, Vector128<long>   Value3, Vector128<long>   Value4) value, byte index);
        public static unsafe void StoreSelectedScalar128x4(ulong*  address, (Vector128<ulong>  Value1, Vector128<ulong>  Value2, Vector128<ulong>  Value3, Vector128<ulong>  Value4) value, byte index);
        public static unsafe void StoreSelectedScalar128x4(float*  address, (Vector128<float>  Value1, Vector128<float>  Value2, Vector128<float>  Value3, Vector128<float>  Value4) value, byte index);
        public static unsafe void StoreSelectedScalar128x4(double* address, (Vector128<double> Value1, Vector128<double> Value2, Vector128<double> Value3, Vector128<double> Value4) value, byte index);
    }
}

API Usage

// Fancy the value
var v = LoadVector128x2(address);

// Getting the values out
Console.WriteLine(v.Item1);
Console.WriteLine(v.Item2);

Alternative Designs

No response

Risks

No response

Metadata

Metadata

Assignees

No one assigned

    Type

    No type

    Projects

    No projects

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions