Home > Blockchain >  How do I specify a deallocator for memory allocated with mem_align in Swift?
How do I specify a deallocator for memory allocated with mem_align in Swift?

Time:01-04

I am creating paged aligned memory with memory_align, then I create a MTLBuffer from that with no copy. The GPU then blits data into that MTLBuffer. When that completes, I wrap that same memory in Data with Data.init(bytesNoCopy:count:deallocator:) to pass on in my project. I don't know what to use as the deallocator. I translating this code from an Apple tutorial written in OBJ-C. The Apple code is here. I spent two days trying to understand this researching myself.

The Apple OBJ-C code deallocator looks like this. This is beyond my OBJ-C knowledge.

// Block to dealloc memory created with vm_allocate
   void (^deallocProvidedAddress)(void *bytes, NSUInteger length) =
                ^(void *bytes, NSUInteger length)
                {
                    vm_deallocate((vm_map_t)mach_task_self(),
                                  (vm_address_t)bytes,
                                  length);
                };

The code in question is towards the end of my listing.

  // Blit all positions and velocities and provide them to the client either to show final results
  // or continue the simulation on another device
  func provideFullData(
    _ dataProvider: AAPLFullDatasetProvider,
    forSimulationTime time: CFAbsoluteTime
  ) {
    let positionDataSize = positions[oldBufferIndex]!.length
    let velocityDataSize = velocities[oldBufferIndex]!.length
    var positionDataAddress: UnsafeMutableRawPointer? = nil
    var velocityDataAddress: UnsafeMutableRawPointer? = nil

// Create buffers to transfer data to client
do {
  
  // allocate memory on page aligned addresses use by both GPU and CPU
  let alignment = 0x4000
  
  // make length a mulitple of alignment
  let positionAllocationSize = (positionDataSize   alignment - 1) & (~(alignment - 1))
  posix_memalign(&positionDataAddress, alignment, positionAllocationSize)
  
  let velocityAllocationSize = (velocityDataSize   alignment - 1) & (~(alignment - 1))
  posix_memalign(&positionDataAddress, alignment, velocityAllocationSize)
}

// Blit positions and velocities to a buffer for transfer
do {
  // create MTL buffers with created mem allighed
  let positionBuffer = device.makeBuffer(
    bytesNoCopy: &positionDataAddress,
    length: positionDataSize,
    options: .storageModeShared,
    deallocator: nil)
  
  positionBuffer?.label = "Final Positions Buffer"
  
  let velocityBuffer = device.makeBuffer(
    bytesNoCopy: &velocityDataAddress,
    length: velocityDataSize,
    options: .storageModeShared,
    deallocator: nil)
  
  velocityBuffer?.label = "Final Velocities Buffer"
  
  let commandBuffer = commandQueue?.makeCommandBuffer()
  commandBuffer?.label = "Full Transfer Command Buffer"
  
  let blitEncoder = commandBuffer?.makeBlitCommandEncoder()
  
  blitEncoder?.label = "Full Transfer Blits"
  
  blitEncoder?.pushDebugGroup("Full Position Data Blit")
  
  if let _position = positions[oldBufferIndex], let positionBuffer {
    blitEncoder?.copy(
      from: _position,
      sourceOffset: 0,
      to: positionBuffer,
      destinationOffset: 0,
      size: positionBuffer.length)
  }
  
  blitEncoder?.popDebugGroup()
  
  blitEncoder?.pushDebugGroup("Full Velocity Data Blit")
  
  if let _velocity = velocities[oldBufferIndex], let velocityBuffer {
    blitEncoder?.copy(
      from: _velocity,
      sourceOffset: 0,
      to: velocityBuffer,
      destinationOffset: 0,
      size: velocityBuffer.length)
  }
  
  blitEncoder?.popDebugGroup()
  
  blitEncoder?.endEncoding()
  
  commandBuffer?.commit()
  
  // Ensure blit of data is complete before providing
  // the data to the client
  commandBuffer?.waitUntilCompleted()
}

// Wrap the memory allocated with vm_allocate
// with a NSData object which will allow the app to
// rely on ObjC ARC (or even MMR) to manage the
// memory's lifetime. Initialize NSData object
// with a deallocation block to free the
// vm_allocated memory when the object has been
// deallocated
do {
   //this code was in obj-c I don'tlnow how to convert this to swift
   // Block to dealloc memory created with vm_allocate
   // let deallocProvidedAddress: ((_ bytes: UnsafeMutableRawPointer?, _ length: Int) -> Void)? =
   // { bytes, length in
   // vm_deallocate(
   // mach_task_self() as? vm_map_t,
   // bytes as? vm_address_t,
   // length)
   // }
  let positionData = Data(
    bytesNoCopy: &positionDataAddress,
    count: positionDataSize,
    deallocator: .none) // this may be a memory leak
  
  let velocityData = Data(
    bytesNoCopy: &velocityDataAddress,
    count: velocityDataSize,
    deallocator: .none) // this may be a memory leak
  
  dataProvider(positionData, velocityData, time)
}

}

Here is the listing for the Apple OBJ-C code

// Set the initial positions and velocities of the simulation based upon the simulation's config
- (void)initializeData
{
    const float pscale = _config->clusterScale;
    const float vscale = _config->velocityScale * pscale;
    const float inner  = 2.5f * pscale;
    const float outer  = 4.0f * pscale;
    const float length = outer - inner;

    _oldBufferIndex = 0;
    _newBufferIndex = 1;

    vector_float4 *positions = (vector_float4 *) _positions[_oldBufferIndex].contents;
    vector_float4 *velocities = (vector_float4 *) _velocities[_oldBufferIndex].contents;

    for(int i = 0; i < _config->numBodies; i  )
    {
        vector_float3 nrpos    = generate_random_normalized_vector(-1.0, 1.0, 1.0);
        vector_float3 rpos     = generate_random_vector(0.0, 1.0);
        vector_float3 position = nrpos * (inner   (length * rpos));

        positions[i].xyz = position;
        positions[i].w = 1.0;

        vector_float3 axis = {0.0, 0.0, 1.0};

        float scalar = vector_dot(nrpos, axis);

        if((1.0f - scalar) < 1e-6)
        {
            axis.xy = nrpos.yx;

            axis = vector_normalize(axis);
        }

        vector_float3 velocity = vector_cross(position, axis);

        velocities[i].xyz = velocity * vscale;
    }

    NSRange fullRange;
    fullRange = NSMakeRange(0, _positions[_oldBufferIndex].length);
    [_positions[_oldBufferIndex] didModifyRange:fullRange];
    fullRange = NSMakeRange(0, _velocities[_oldBufferIndex].length);
    [_velocities[_oldBufferIndex] didModifyRange:fullRange];
}

/// Set simulation data for a simulation that was begun elsewhere (i.e. on another device)
- (void)setPositionData:(nonnull NSData *)positionData
           velocityData:(nonnull NSData *)velocityData
      forSimulationTime:(CFAbsoluteTime)simulationTime
{
    _oldBufferIndex = 0;
    _newBufferIndex = 1;

    vector_float4 *positions = (vector_float4 *) _positions[_oldBufferIndex].contents;
    vector_float4 *velocities = (vector_float4 *) _velocities[_oldBufferIndex].contents;

    assert(_positions[_oldBufferIndex].length == positionData.length);
    assert(_velocities[_oldBufferIndex].length == velocityData.length);

    memcpy(positions, positionData.bytes, positionData.length);
    memcpy(velocities, velocityData.bytes, velocityData.length);

    NSRange fullRange;
    fullRange = NSMakeRange(0, _positions[_oldBufferIndex].length);
    [_positions[_oldBufferIndex] didModifyRange:fullRange];
    fullRange = NSMakeRange(0, _velocities[_oldBufferIndex].length);
    [_velocities[_oldBufferIndex] didModifyRange:fullRange];

    _simulationTime = simulationTime;
}

/// Blit a subset of the positions data for this frame and provide them to the client
/// to show a summary of the simulation's progress
- (void)fillUpdateBufferWithPositionBuffer:(nonnull id<MTLBuffer>)buffer
                        usingCommandBuffer:(nonnull id<MTLCommandBuffer>)commandBuffer
{
    id<MTLBlitCommandEncoder> blitEncoder = [commandBuffer blitCommandEncoder];
    blitEncoder.label = @"Position Update Blit Encoder";

    [blitEncoder pushDebugGroup:@"Position Update Blit Commands"];

    [blitEncoder copyFromBuffer:buffer
                   sourceOffset:0
                       toBuffer:_updateBuffer[_currentBufferIndex]
              destinationOffset:0
                           size:_updateBuffer[_currentBufferIndex].length];

    [blitEncoder popDebugGroup];

    [blitEncoder endEncoding];
}

/// Blit all positions and velocities and provide them to the client either to show final results
/// or continue the simulation on another device
- (void)provideFullData:(nonnull AAPLFullDatasetProvider)dataProvider
      forSimulationTime:(CFAbsoluteTime)time
{
    NSUInteger positionDataSize = _positions[_oldBufferIndex].length;
    NSUInteger velocityDataSize = _velocities[_oldBufferIndex].length;
    void *positionDataAddress = NULL;
    void *velocityDataAddress = NULL;

    // Create buffers to transfer data to client
    {
        // Use vm allocate to allocate buffer on page aligned address
        kern_return_t err;
      
        err = vm_allocate((vm_map_t)mach_task_self(),
                          (vm_address_t*)&positionDataAddress,
                          positionDataSize,
                          VM_FLAGS_ANYWHERE);
        assert(err == KERN_SUCCESS);

        err = vm_allocate((vm_map_t)mach_task_self(),
                          (vm_address_t*)&velocityDataAddress,
                          velocityDataSize,
                          VM_FLAGS_ANYWHERE);
        assert(err == KERN_SUCCESS);
    }

    // Blit positions and velocities to a buffer for transfer
    {
        id<MTLBuffer> positionBuffer = [_device newBufferWithBytesNoCopy:positionDataAddress
                                                                  length:positionDataSize
                                                                 options:MTLResourceStorageModeShared
                                                             deallocator:nil];

        positionBuffer.label = @"Final Positions Buffer";

        id<MTLBuffer> velocityBuffer = [_device newBufferWithBytesNoCopy:velocityDataAddress
                                                                  length:velocityDataSize
                                                                 options:MTLResourceStorageModeShared
                                                             deallocator:nil];

        velocityBuffer.label = @"Final Velocities Buffer";

        id<MTLCommandBuffer> commandBuffer = [_commandQueue commandBuffer];
        commandBuffer.label = @"Full Transfer Command Buffer";

        id<MTLBlitCommandEncoder> blitEncoder = [commandBuffer blitCommandEncoder];

        blitEncoder.label = @"Full Transfer Blits";

        [blitEncoder pushDebugGroup:@"Full Position Data Blit"];

        [blitEncoder copyFromBuffer:_positions[_oldBufferIndex]
                       sourceOffset:0
                           toBuffer:positionBuffer
                  destinationOffset:0
                               size:positionBuffer.length];

        [blitEncoder popDebugGroup];

        [blitEncoder pushDebugGroup:@"Full Velocity Data Blit"];

        [blitEncoder copyFromBuffer:_velocities[_oldBufferIndex]
                       sourceOffset:0
                           toBuffer:velocityBuffer
                  destinationOffset:0
                               size:velocityBuffer.length];

        [blitEncoder popDebugGroup];

        [blitEncoder endEncoding];

        [commandBuffer commit];

        // Ensure blit of data is complete before providing the data to the client
        [commandBuffer waitUntilCompleted];
    }

    // Wrap the memory allocated with vm_allocate with a NSData object which will allow the app to
    // rely on ObjC ARC (or even MMR) to manage the memory's lifetime. Initialize NSData object
    // with a deallocation block to free the vm_allocated memory when the object has been
    // deallocated
    {
        // Block to dealloc memory created with vm_allocate
        void (^deallocProvidedAddress)(void *bytes, NSUInteger length) =
            ^(void *bytes, NSUInteger length)
            {
                vm_deallocate((vm_map_t)mach_task_self(),
                              (vm_address_t)bytes,
                              length);
            };

        NSData *positionData = [[NSData alloc] initWithBytesNoCopy:positionDataAddress
                                                            length:positionDataSize
                                                       deallocator:deallocProvidedAddress];

        NSData *velocityData = [[NSData alloc] initWithBytesNoCopy:velocityDataAddress
                                                            length:velocityDataSize
                                                       deallocator:deallocProvidedAddress];

        dataProvider(positionData, velocityData, time);
    }
}

CodePudding user response:

You define the deallocation block (or even a named function), similar to the way its done in Obj-C, though some casting is needed. The Obj-C deallocator block becomes the following closure in Swift:

        let deallocProvidedAddress = {
            (_ bytes: UnsafeMutableRawPointer, _ length: Int) -> Void in
            vm_deallocate(mach_task_self_, vm_offset_t(bitPattern: bytes), vm_size_t(length))
        }

Then instead of .none for the deallocator parameter for Data(bytesNoCopy:count:deallocator), you pass .custom(deallocProvidedAddress).

        let positionData = Data(
            bytesNoCopy: &positionDataAddress,
            count: positionDataSize,
            deallocator: .custom(deallocProvidedAddress))
        
        let velocityData = Data(
            bytesNoCopy: &velocityDataAddress,
            count: velocityDataSize,
            deallocator: .custom(deallocProvidedAddress))
        
        dataProvider(positionData, velocityData, time)

However, since you don't call vm_allocate, but instead use posix_memalign, you'd need to call free instead of vm_deallocate in deallocProvidedAddress:

        let deallocProvidedAddress = {
            (_ bytes: UnsafeMutableRawPointer, _ length: Int) -> Void in
            free(bytes)
        }

How did I know to use free? Having never actually used posix_memalign myself, I just did man posix_memalign in Terminal, and it says, among other things:

Memory that is allocated via posix_memalign() can be used as an argument in subsequent calls to realloc(3), reallocf(3), and free(3).

So free is the appropriate way to deallocate memory allocated via posix_memalign

This is my translation of the Obj-C version of provideFullData into Swift. It uses vm_allocate and vm_deallocate since that's what the Obj-C version does, but you can easily replace that with posix_memalign and free, if you like:

    /// Blit all positions and velocities and provide them to the client either to show final results
    /// or continue the simulation on another device
    func provide(fullData dataProvider: AAPLFullDatasetProvider, forSimulationTime time: CFAbsoluteTime)
    {
        let positionDataSize = positions[oldBufferIndex]!.length
        let velocityDataSize = velocities[oldBufferIndex]!.length
        
        func vm_alloc(count: Int) -> UnsafeMutableRawPointer?
        {
            var address: vm_address_t = 0
            let err = vm_allocate(mach_task_self_, &address, vm_size_t(count), VM_FLAGS_ANYWHERE)
            return err == KERN_SUCCESS
                ? UnsafeMutableRawPointer(bitPattern: address)
                : nil
        }
        
        func makeMTLBuffer(
            from bytes: UnsafeMutableRawPointer,
            count: Int,
            labeled label: String) -> MTLBuffer?
        {
            guard let buffer = device.makeBuffer(
                bytesNoCopy: bytes,
                length: count,
                options: [.storageModeShared],
                deallocator: nil)
            else { return nil }
            
            buffer.label = label
            return buffer
        }
        
        guard let positionDataAddress = vm_alloc(count: positionDataSize) else {
            fatalError("failed to allocate position data")
        }
        guard let velocityDataAddress = vm_alloc(count: velocityDataSize) else {
            fatalError("failed to allocate velocity data")
        }

        // Blit positions and velocities to a buffer for transfer
        guard let positionBuffer = makeMTLBuffer(
                from: positionDataAddress,
                count: positionDataSize,
                labeled: "Final Positions Buffer")
        else { fatalError("Failed to allocate positions MTLBuffer") }
        
        guard let velocityBuffer = makeMTLBuffer(
                from: velocityDataAddress,
                count: velocityDataSize,
                labeled: "Final Velocities Buffer")
        else { fatalError("Failed to allocate velocities MTLBuffer") }
        
        guard let commandBuffer = commandQueue.makeCommandBuffer() else {
            fatalError("Failed to make commandBuffer")
        }
        commandBuffer.label = "Full Transfer Command Buffer"


        guard let blitEncoder = commandBuffer.makeBlitCommandEncoder() else {
            fatalError("Failed to make blitEncoder")
        }
        blitEncoder.label = "Full Transfer Blits"

        blitEncoder.pushDebugGroup("Full Position Data Blit")
        blitEncoder.copy(
            from: positions[oldBufferIndex]!,
            sourceOffset: 0,
            to: positionBuffer,
            destinationOffset: 0,
            size: positionBuffer.length
        )
        blitEncoder.popDebugGroup()

        blitEncoder.pushDebugGroup("Full Velocity Data Blit")
        blitEncoder.copy(
            from: velocities[oldBufferIndex]!,
            sourceOffset: 0,
            to: velocityBuffer,
            destinationOffset: 0,
            size: velocityBuffer.length
        )
        blitEncoder.popDebugGroup()
        
        blitEncoder.endEncoding()
        
        commandBuffer.commit()

        // Ensure blit of data is complete before providing the data to the client
        commandBuffer.waitUntilCompleted()


        // Wrap the memory allocated with vm_allocate with a NSData object which will allow the app to
        // rely on ObjC ARC (or even MMR) to manage the memory's lifetime. Initialize NSData object
        // with a deallocation block to free the vm_allocated memory when the object has been
        // deallocated
        // Block to dealloc memory created with vm_allocate
        let deallocProvidedAddress =
        { (_ bytes: UnsafeMutableRawPointer, _ length: Int) -> Void in
            vm_deallocate(
                mach_task_self_,
                vm_offset_t(bitPattern: bytes),
                vm_size_t(length)
            )
        }

        let positionData = Data(
            bytesNoCopy: positionDataAddress,
            count: positionDataSize,
            deallocator: .custom(deallocProvidedAddress))
        
        let velocityData = Data(
            bytesNoCopy: velocityDataAddress,
            count: velocityDataSize,
            deallocator: .custom(deallocProvidedAddress))
        
        dataProvider(positionData, velocityData, time)
    }

I see lots of opportunities for refactoring here (I already did a little bit). If you do something other than fatalError in the "sad" path, don't forget that you need to deallocate positionDataAddress and velocityDataAddress before returning or throwing. I would at least refactor it so that each Data instance is made immediately after its successful vm_allocate/posix_memalign instead of waiting until the very end of the method, that way, in case of errors, clean up can happen automatically. I'd also extract all the Metal blit code into it's own function.

  • Related