How to merge 1 video and 2 or more audio files with AVFoundation-CodePudding

I am trying to create a video by merging 1 ".mov" file and 2 or more ".caf" files, my code works fine when merging 1 ".mov" file and 1 ".caf" file, and it doesn't crash nor throw any error when merging it with more ".caf" files, however, in the latter scenario the only audio that can be heard is the first one. I have tried to add the audios at different times and 2 at the same time, but the result is the same, only 1 audio sound.

If I try to merge the audio of the video the same thing happens, only the first audio can be heard (the video's audio).

Here is my code:

        public void mergeAudios()
        {
            //This funtion merges the final video with the new audio
            //microphone
            AVCaptureDevice microphone = AVCaptureDevice.DefaultDeviceWithMediaType(AVMediaType.Audio);

            AVMutableComposition mixComposition = new AVMutableComposition();

            #region HoldVideoTrack
            AVAsset video_asset = AVAsset.FromUrl(NSUrl.FromFilename(FinalVideo));

            CMTimeRange range;
            range = new CMTimeRange()
            {
                Start = new CMTime(0, 0),
                Duration = video_asset.Duration
            };

            AVMutableCompositionTrack videoTrack = mixComposition.AddMutableTrack(AVMediaType.Video, 0);
            AVAssetTrack assetVideoTrack = video_asset.TracksWithMediaType(AVMediaType.Video)[0];
            videoTrack.InsertTimeRange(range, assetVideoTrack, mixComposition.Duration, out NSError error1);
            
            try
            {
                CMTime prevDur = CMTime.Zero;
                foreach (Audio _audio in TTS_list)
                {
                    AVAsset audio_asset = AVAsset.FromUrl(NSUrl.FromFilename(_audio.Path));
                    CMTime aaa = new CMTime((long)(_audio.Starting_Point * Convert.ToDouble(mixComposition.Duration.TimeScale)), mixComposition.Duration.TimeScale);

                    AVMutableCompositionTrack audioTrack = mixComposition.AddMutableTrack(AVMediaType.Audio, 0);

                    AVAssetTrack assetAudioTrack = audio_asset.TracksWithMediaType(AVMediaType.Audio)[0];
                    audioTrack.InsertTimeRange(_audio.Range, assetAudioTrack, prevDur /*aaa*/, out NSError error3);
                    prevDur  = _audio.Range.Duration;

                }
            }
            catch (Exception error) { }
            #endregion


            #region Instructions
            AVMutableVideoCompositionLayerInstruction[] Instruction_Array = new AVMutableVideoCompositionLayerInstruction[1];
            Instruction_Array[0] = SetInstruction(video_asset, mixComposition.Duration, videoTrack);
            #endregion

            // 6
            var mainInstruction = new AVMutableVideoCompositionInstruction();

            CMTimeRange rangeIns = new CMTimeRange()
            {
                Start = new CMTime(0, 0),
                Duration = mixComposition.Duration
            };

            mainInstruction.BackgroundColor = UIColor.FromRGBA(0.63f, 0.84f, 0.82f, 1.000f).CGColor;
            mainInstruction.TimeRange = rangeIns;
            mainInstruction.LayerInstructions = Instruction_Array;

            var mainComposition = new AVMutableVideoComposition()
            {
                Instructions = new AVVideoCompositionInstruction[1] { mainInstruction },
                FrameDuration = new CMTime(1, 30),
                RenderSize = new CoreGraphics.CGSize(UIScreenWidth, UIScreenHeight)
            };

            //... export video ...

            pathh = NSUrl.FromFilename(Path.Combine(Path.GetTempPath(), "temporaryClip/Whole2.mov"));
            AVAssetExportSession exportSession = new AVAssetExportSession(mixComposition, AVAssetExportSessionPreset.MediumQuality)
            {
                OutputUrl = NSUrl.FromFilename(Path.Combine(Path.GetTempPath(), "temporaryClip/Whole2.mov")),
                OutputFileType = AVFileType.QuickTimeMovie,
                ShouldOptimizeForNetworkUse = true,
                VideoComposition = mainComposition
            };
            exportSession.ExportAsynchronously(_OnExportDone);
        }

If you need any more info I will provide it as soon as I see your request. Thank you all for your time, have a nice day.

CodePudding user response：

Ok, I just found what the problem was; basically, there is one golden rule that must be followed when using AVMutableComposition (at least to merge multiple audios), that is:

1 audio = 1 video 1 instruction

In other words, for every audio, there must be 1 video and 1 instruction. Following this rule my previous code results in the following:

public void mergeAudios()
        {
            //This funtion merges the final video with the new audio

            #region HoldVideoTrack
            AVAsset video_asset = AVAsset.FromUrl(NSUrl.FromFilename(FinalVideo));

            //This range applies to the video, not to the mixcomposition
            CMTimeRange range = new CMTimeRange()
            {
                Start = new CMTime(0, 0),
                Duration = video_asset.Duration
            };
            #endregion

            AVMutableComposition mixComposition = new AVMutableComposition();

            #region AddsVideo
            AVMutableCompositionTrack videoTrack = mixComposition.AddMutableTrack(AVMediaType.Video, 0);
            AVAssetTrack assetVideoTrack = video_asset.TracksWithMediaType(AVMediaType.Video)[0];
            videoTrack.InsertTimeRange(range, assetVideoTrack, CMTime.Zero, out NSError error1);
            #endregion

            #region AddsVideo'sAudio
            //If the device can't use the microphone then the original video's audio will not exist
            AVCaptureDevice microphone = AVCaptureDevice.DefaultDeviceWithMediaType(AVMediaType.Audio);
            if (microphone != null)
            {
                AVMutableCompositionTrack audio_video_Track = mixComposition.AddMutableTrack(AVMediaType.Audio, 0);
                AVAssetTrack assetAudioVideoTrack = video_asset.TracksWithMediaType(AVMediaType.Audio)[0];
                audio_video_Track.InsertTimeRange(range, assetAudioVideoTrack, mixComposition.Duration, out NSError error2);
            }
            #endregion

            //[TTS_list.Count   1];  1 = original Video
            AVMutableVideoCompositionLayerInstruction[] Instruction_Array = new AVMutableVideoCompositionLayerInstruction[TTS_list.Count   1];
            //This instruction is for "FinalVideo"
            Instruction_Array[0] = SetInstruction(video_asset, mixComposition.Duration, videoTrack);


            #region TestingEnviroment
            //We will use counter to specify the position in Instruction_Array, we start with 1 because we have already added 1 instruction for "FinalVideo"
            int counter = 1;
            foreach(Audio _audioo in TTS_list)
            {
                #region Video
                AVMutableCompositionTrack videoTrack_forAudio = mixComposition.AddMutableTrack(AVMediaType.Video, 0);
                AVAssetTrack assetVideoTrack_forAudio = video_asset.TracksWithMediaType(AVMediaType.Video)[0];

                //This range applies to the video, not to the mixcomposition, making its duration 0 and having no overall effect on the final video.
                //We have to declare 1 video for each audio in order to merge multiple audios. Doing it this way the videos have no effect, but the audios do
                CMTimeRange range0 = new CMTimeRange()
                {
                    Start = new CMTime(0, 0),
                    Duration = CMTime.FromSeconds(0, 600)
                };
                videoTrack_forAudio.InsertTimeRange(range0, assetVideoTrack_forAudio, mixComposition.Duration, out NSError error4);
                #endregion

                #region Audio
                AVAsset audio_asset = AVAsset.FromUrl(NSUrl.FromFilename(_audioo.Path));

                //This range applies to the video, not to the mixcomposition
                //We use _audio.Duration instead of audio_asset.Duration.Seconds because the audio's duration might be trimmed
                CMTimeRange audio_CMTime = new CMTimeRange()
                {
                    Start = new CMTime(0, 0),
                    Duration = CMTime.FromSeconds(_audioo.Duration, 600)
                };

                //This range applies to mixcomposition, not to the video
                var starting_CMTime = CMTime.FromSeconds(_audioo.Starting_Point, 600);

                AVMutableCompositionTrack audioTrack = mixComposition.AddMutableTrack(AVMediaType.Audio, 0);
                AVAssetTrack assetAudioTrack = audio_asset.TracksWithMediaType(AVMediaType.Audio)[0];
                audioTrack.InsertTimeRange(audio_CMTime, assetAudioTrack, starting_CMTime, out NSError error5);
                #endregion

                #region Instruction
                Instruction_Array[counter] = SetInstruction(video_asset, mixComposition.Duration, videoTrack);
                counter  = 1;
                #endregion
            }
            #endregion


            #region Instructions
            var mainInstruction = new AVMutableVideoCompositionInstruction();

            CMTimeRange rangeIns = new CMTimeRange()
            {
                Start = new CMTime(0, 0),
                Duration = mixComposition.Duration
            };

            mainInstruction.BackgroundColor = UIColor.FromRGBA(0.63f, 0.84f, 0.82f, 1.000f).CGColor;
            mainInstruction.TimeRange = rangeIns;
            mainInstruction.LayerInstructions = Instruction_Array;
            #endregion

            var mainComposition = new AVMutableVideoComposition()
            {
                Instructions = new AVVideoCompositionInstruction[1] { mainInstruction },
                FrameDuration = new CMTime(1, 30),
                RenderSize = new CoreGraphics.CGSize(UIScreenWidth, UIScreenHeight)
            };

            finalVideo_path = NSUrl.FromFilename(Path.Combine(Path.GetTempPath(), "temporaryClip/FinalVideoEdit.mov"));
            if (File.Exists(Path.GetTempPath()   "temporaryClip/FinalVideoEdit.mov"))
            {
                File.Delete(Path.GetTempPath()   "temporaryClip/FinalVideoEdit.mov");
            }

            AVAssetExportSession exportSession = new AVAssetExportSession(mixComposition, AVAssetExportSessionPreset.MediumQuality)
            {
                OutputUrl = finalVideo_path,
                OutputFileType = AVFileType.QuickTimeMovie,
                ShouldOptimizeForNetworkUse = true,
                VideoComposition = mainComposition
            };
            exportSession.ExportAsynchronously(_OnExportDone);
        }