AutoDub/main.py at main · cranberis/AutoDub · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
from tools.audio_synthesis import *
from tools.transcriber import *
from tools.audio_splitter_ffmpeg import *
from tools.video_downloader import *
from tools.video_editing import *
import sys

if __name__ == '__main__':
    # Prepare folders
    os.makedirs('downloads', exist_ok=True)
    os.makedirs('original_audios', exist_ok=True)
    os.makedirs('output_audio', exist_ok=True)
    os.makedirs('final_output', exist_ok=True)
    os.makedirs('logs', exist_ok=True)

    # Redirect all print outputs to a log file
    # log_output_file = open('logs/log.txt', 'w')
    # sys.stdout = log_output_file

    # Set the multiprocessing start method to 'spawn'
    multiprocessing.set_start_method('spawn')

    # Download video
    url = input("Enter YouTube URL: ")
    video = video_download(url)
    # video = "downloads/xSh7PuWAxXU.mp4"

    # # Extract audio
    audio = audio_extractor(video)

    ## Load audio
    # audio = 'original_audios/xSh7PuWAxXU.wav'
    original_audio_name = os.path.splitext(os.path.basename(audio))[0]

    ## Transcribe audio
    segments, detected_language = transcribe(audio)
    print(f"Audio transcribed. Detected language: {detected_language}")

    ## Split audio file
    process_full_audio_with_spleeter(audio)
    print("Audio split")

    ## Translate each segment
    new_segments = []
    i = 0
    total_segments = len(segments)
    for segment in segments:
        if len(segment['text']) > 0:
            translated_text = translate_deepl(segment['text'], 'es', detected_language)
        else:
            translated_text = ''
        new_segments.append({'id': segment['id'],
                            'seek': segment['seek'],
                            'start': segment['start'],
                            'end': segment['end'],
                            'text': translated_text
                            })
        percentage = (i+1)/total_segments*100
        # Print the progress
        print(f'Translation progress: {percentage:.2f}%')
        i += 1
    print(f"Audio translated")

    ## save new_segments as a pickle for later loading
    import pickle

    with open('new_segments.pkl', 'wb') as f:
        pickle.dump(new_segments, f)
    print("New segments saved")

    ## Load new_segments from pickle
    import pickle
    with open('new_segments.pkl', 'rb') as f:
        new_segments = pickle.load(f)

    # Synthesize audio
    target_lang_code = "es"

    synthesized_segments_paths = synthesize_segments_with_workers(
        segments=new_segments,
        speaker_wav_path=f"output_audio/{original_audio_name}_vocals.wav",
        target_language_code=target_lang_code,
        num_workers=3,
        device='cuda'
    )
    print(f"Audio synthesized")

    # Overlay audio files together
    # overlay_synthesized_speech(
    #     segments=new_segments,
    #     synthesized_segments_paths=synthesized_segments_paths,
    #     background_audio_path=f'output_audio/{original_audio_name}_accompaniment.wav',
    #     output_path=f'final_output/{original_audio_name}-{target_lang_code}.wav'
    # )
    # print(f"Audio overlaid")


    # Video editing
    video_output = f'final_output/{original_audio_name}-{target_lang_code}.mp4'
    background_audio = f'output_audio/{original_audio_name}_accompaniment.wav'

    adjust_video_to_synthesized_audio(
        segments=new_segments,
        synthesized_segments_paths=synthesized_segments_paths,
        video_path=video,
        background_audio_path=background_audio,
        output_video_path=video_output
    )

    # Delete all content in output_audio folder
    for filename in os.listdir('output_audio'):
        file_path = os.path.join('output_audio', filename)
        if os.path.isfile(file_path):
            os.remove(file_path)


    # Mix old video file and new audio file
    # video_path = f'downloads/{original_audio_name}.mp4'
    # audio_path = f'final_output/{original_audio_name}-{target_lang_code}.wav'
    # replace_audio_in_video(video, audio_path, f'final_output/{original_audio_name}-{target_lang_code}.mp4')

    # Close log output file
    # sys.stdout = sys.__stdout__
    # log_output_file.close()