| import soundfile |
| import pyrubberband |
| import configparser |
| import pathlib |
| import os |
| import io |
|
|
| from Scripts.shared_imports import * |
| import Scripts.TTS as TTS |
| from Scripts.utils import parseBool |
|
|
| from pydub import AudioSegment |
| from pydub.silence import detect_leading_silence |
| import langcodes |
|
|
| |
| workingFolder = "workingFolder" |
|
|
|
|
| def trim_clip(inputSound): |
| trim_leading_silence: AudioSegment = lambda x: x[detect_leading_silence(x) :] |
| trim_trailing_silence: AudioSegment = lambda x: trim_leading_silence(x.reverse()).reverse() |
| strip_silence: AudioSegment = lambda x: trim_trailing_silence(trim_leading_silence(x)) |
| strippedSound = strip_silence(inputSound) |
| return strippedSound |
|
|
| |
| def insert_audio(canvas, audioToOverlay, startTimeMs): |
| |
| canvasCopy = canvas |
| |
| canvasCopy = canvasCopy.overlay(audioToOverlay, position=int(startTimeMs)) |
| |
| return canvasCopy |
|
|
| |
| def create_canvas(canvasDuration, frame_rate=int(config['synth_sample_rate'])): |
| canvas = AudioSegment.silent(duration=canvasDuration, frame_rate=frame_rate) |
| return canvas |
|
|
| def get_speed_factor(subsDict, trimmedAudio, desiredDuration, num): |
| virtualTempFile = AudioSegment.from_file(trimmedAudio, format="wav") |
| rawDuration = virtualTempFile.duration_seconds |
| trimmedAudio.seek(0) |
| |
| desiredDuration = float(desiredDuration) |
| speedFactor = (rawDuration*1000) / desiredDuration |
| subsDict[num]['speed_factor'] = speedFactor |
| return subsDict |
|
|
| def stretch_audio(audioFileToStretch, speedFactor, num): |
| virtualTempAudioFile = io.BytesIO() |
| |
| y, sampleRate = soundfile.read(audioFileToStretch) |
|
|
| streched_audio = pyrubberband.time_stretch(y, sampleRate, speedFactor, rbargs={'--fine': '--fine'}) |
| |
| soundfile.write(virtualTempAudioFile, streched_audio, sampleRate, format='wav') |
| if config['debug_mode']: |
| soundfile.write(os.path.join(workingFolder, f'{num}_s.wav'), streched_audio, sampleRate) |
| |
| return AudioSegment.from_file(virtualTempAudioFile, format="wav") |
|
|
|
|
| from pydub import AudioSegment |
|
|
| def build_audio(subsDict, langDict, totalAudioLength, twoPassVoiceSynth=False): |
| if cloudConfig['tts_service'] == 'azure': |
| twoPassVoiceSynth = False |
|
|
| virtualTrimmedFileDict = {} |
| |
| for key, value in subsDict.items(): |
| filePathTrimmed = os.path.join(workingFolder, str(key)) + "_t.wav" |
| subsDict[key]['TTS_FilePath_Trimmed'] = filePathTrimmed |
|
|
|
|
| |
| rawClip = AudioSegment.from_file(value['TTS_FilePath'], format="mp3", frame_rate=int(config['synth_sample_rate'])) |
| trimmedClip = trim_clip(rawClip) |
|
|
| if config['debug_mode']: |
| trimmedClip.export(filePathTrimmed, format="wav") |
|
|
| |
| tempTrimmedFile = io.BytesIO() |
| trimmedClip.export(tempTrimmedFile, format="wav") |
| virtualTrimmedFileDict[key] = tempTrimmedFile |
| keyIndex = list(subsDict.keys()).index(key) |
| print(f" Trimmed Audio: {keyIndex + 1} of {len(subsDict)}", end="\r") |
| print("\n") |
|
|
| |
| if not cloudConfig['tts_service'] == 'azure': |
| |
| for key, value in subsDict.items: |
| |
| subsDict = get_speed_factor(subsDict, virtualTrimmedFileDict[key], value['duration_ms'], num=key) |
| keyIndex = list(subsDict.keys()).index(key) |
| print(f" Calculated Speed Factor: {keyIndex + 1} of {len(subsDict)}", end="\r") |
| print("\n") |
|
|
| |
| |
| if twoPassVoiceSynth and not cloudConfig['tts_service'] == 'azure': |
| if cloudConfig['batch_tts_synthesize'] and cloudConfig['tts_service'] == 'azure': |
| subsDict = TTS.synthesize_dictionary_batch(subsDict, langDict, skipSynthesize=config['skip_synthesize'], secondPass=True) |
| else: |
| subsDict = TTS.synthesize_dictionary(subsDict, langDict, skipSynthesize=config['skip_synthesize'], secondPass=True) |
|
|
| for key, value in subsDict.items: |
| |
| rawClip = AudioSegment.from_file(value['TTS_FilePath'], format="mp3", frame_rate=int(config['synth_sample_rate'])) |
| trimmedClip = trim_clip(rawClip) |
| if config['debug_mode']: |
| |
| secondPassTrimmedFile = value['TTS_FilePath_Trimmed'][:-4] + "_p2_t.wav" |
| trimmedClip.export(secondPassTrimmedFile, format="wav") |
| trimmedClip.export(virtualTrimmedFileDict[key], format="wav") |
| keyIndex = list(subsDict.keys()).index(key) |
| print(f" Trimmed Audio (2nd Pass): {keyIndex + 1} of {len(subsDict)}", end="\r") |
| print("\n") |
|
|
| if config['force_stretch_with_twopass']: |
| for key, value in subsDict.items: |
| subsDict = get_speed_factor(subsDict, virtualTrimmedFileDict[key], value['duration_ms'], num=key) |
| keyIndex = list(subsDict.keys()).index(key) |
| print(f" Calculated Speed Factor (2nd Pass): {keyIndex + 1} of {len(subsDict)}", end="\r") |
| print("\n") |
|
|
| |
| canvas = create_canvas(totalAudioLength) |
|
|
| |
| for key, value in subsDict.items(): |
| if (not twoPassVoiceSynth or config['force_stretch_with_twopass']) and not cloudConfig['tts_service'] == 'azure': |
| |
| stretchedClip = stretch_audio(virtualTrimmedFileDict[key], speedFactor=subsDict[key]['speed_factor'], num=key) |
| else: |
| |
| stretchedClip = AudioSegment.from_file(virtualTrimmedFileDict[key], format="wav") |
| virtualTrimmedFileDict[key].seek(0) |
|
|
| canvas = insert_audio(canvas, stretchedClip, value['start_ms']) |
| keyIndex = list(subsDict.keys()).index(key) |
| print(f" Final Audio Processed: {keyIndex + 1} of {len(subsDict)}", end="\r") |
| print("\n") |
|
|
|
|
| |
| lang = langcodes.get(langDict['languageCode']) |
| langName = langcodes.get(langDict['languageCode']).get(lang.to_alpha3()).display_name() |
| if config['debug_mode'] and not os.path.isfile(ORIGINAL_VIDEO_PATH): |
| outputFileName = "debug" + f" - {langName} - {langDict['languageCode']}." |
| else: |
| outputFileName = pathlib.Path(ORIGINAL_VIDEO_PATH).stem + f" - {langName} - {langDict['languageCode']}." |
| |
| outputFileName = os.path.join(OUTPUT_FOLDER, outputFileName) |
|
|
| |
| outputFormat=config['output_format'].lower() |
| if outputFormat == "mp3": |
| outputFileName += "mp3" |
| formatString = "mp3" |
| elif outputFormat == "wav": |
| outputFileName += "wav" |
| formatString = "wav" |
| elif outputFormat == "aac": |
| |
| |
| outputFileName += "aac" |
| formatString = "adts" |
|
|
| canvas = canvas.set_channels(2) |
| try: |
| print("\nExporting audio file...") |
| canvas.export(outputFileName, format=formatString, bitrate="192k") |
| except: |
| outputFileName = outputFileName + ".bak" |
| canvas.export(outputFileName, format=formatString, bitrate="192k") |
| print("\nThere was an issue exporting the audio, it might be a permission error. The file was saved as a backup with the extension .bak") |
| print("Try removing the .bak extension then listen to the file to see if it worked.\n") |
| input("Press Enter to exit...") |
|
|
| return subsDict |
|
|