-
Notifications
You must be signed in to change notification settings - Fork 1
/
AudioSetLoad.py
84 lines (66 loc) · 2.64 KB
/
AudioSetLoad.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
ffmpeg_path = '/usr/bin/ffmpeg'
import sys
import os.path
# Make sure ffmpeg is on the path so sk-video can find it
sys.path.append(os.path.dirname(ffmpeg_path))
import skvideo.io
import cv2
import matplotlib.pyplot as plt
import numpy as np
import pafy
import soundfile as sf
import subprocess as sp
import random
# Set output settings
audio_codec = 'wav'
audio_container = 'wav'
video_codec = 'h264'
video_container = 'mp4'
# Load the AudioSet training set
with open('data/balanced_train_segments.csv') as f:
lines = f.readlines()
dl_list = [[s.replace('\"', '').replace(' ','') for s in line.strip().split(',')] for line in lines[3:]]
# Load the AudioSet class names
with open('data/class_labels_indices.csv') as f:
lines = f.readlines()
cl_list = [line.strip().split(',')[0:3] for line in lines[1:]]
index_dictionary = {b : c for a,b,c in cl_list}
file_labelling = {a[0]:[index_dictionary[c] for c in a[3:]] for a in dl_list}
reverse_index_dictionary = {c:b for a,b,c in cl_list}
def dlfile(ytid, ts_start, ts_end):
# Set output settings
audio_codec = 'wav'
audio_container = 'wav'
video_codec = 'h264'
video_container = 'mp4'
ts_start, ts_end = float(ts_start), float(ts_end)
duration = ts_end - ts_start
# Get output video and audio filepaths
basename_fmt = '{}_{}_{}'.format(ytid, int(ts_start*1000), int(ts_end*1000))
audio_filepath = os.path.join('.', basename_fmt + '.' + audio_codec)
# Download the audio
# Get the URL to the video page
video_page_url = 'https://www.youtube.com/watch?v={}'.format(ytid)
# Get the direct URLs to the videos with best audio and with best video (with audio)
video = pafy.new(video_page_url)
best_audio = video.getbestaudio()
best_audio_url = best_audio.url
audio_dl_args = [ffmpeg_path,
'-ss', str(ts_start), # The beginning of the trim window
'-i', best_audio_url, # Specify the input video URL
'-t', str(duration), # Specify the duration of the output
'-vn', # Suppress the video stream
'-ac', '2', # Set the number of channels
'-y', # overwrite
'-sample_fmt', 's16', # Specify the bit depth
#'-acodec', audio_codec, # Specify the output encoding
'-ar', '44100', # Specify the audio sample rate
audio_filepath]
proc = sp.Popen(audio_dl_args, stdout=sp.PIPE, stderr=sp.PIPE)
stdout, stderr = proc.communicate()
if proc.returncode != 0:
print(stderr)
return audio_filepath
def dl_random_file():
ytid, ts_start, ts_end = random.choice(dl_list)[:3]
return dlfile(ytid, ts_start, ts_end), ytid, file_labelling[ytid]