Skip to content

Commit c68b459

Browse files
authored
fix the case when segments has different #lines from wav.scp (#413)
1 parent aca7c2c commit c68b459

File tree

5 files changed

+58
-16
lines changed

5 files changed

+58
-16
lines changed

.github/workflows/ci.yaml

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,41 @@ jobs:
102102
name: artifacts-${{ matrix.config }}
103103
path: egs/yesno/voc1
104104

105+
integration_segments:
106+
runs-on: ubuntu-20.04
107+
strategy:
108+
max-parallel: 10
109+
matrix:
110+
python-version: [3.9]
111+
pytorch-version: [1.13.1]
112+
steps:
113+
- uses: actions/checkout@master
114+
- uses: actions/setup-python@v2
115+
with:
116+
python-version: ${{ matrix.python-version }}
117+
architecture: 'x64'
118+
- uses: actions/cache@v2
119+
with:
120+
path: ~/.cache/pip
121+
key: ${{ runner.os }}-${{ matrix.python-version }}-${{ matrix.pytorch-version }}-pip-${{ hashFiles('**/setup.py') }}
122+
restore-keys: |
123+
${{ runner.os }}-${{ matrix.python-version }}-${{ matrix.pytorch-version }}-pip-
124+
- name: Install dependencies
125+
run: |
126+
sudo apt-get install libsndfile-dev jq
127+
# make python env
128+
cd tools; make CUDA_VERSION="" PYTHON=python${{ matrix.python-version }} PYTORCH_VERSION=${{ matrix.pytorch-version }}
129+
source venv/bin/activate
130+
pip install torch-yin
131+
- name: Integration
132+
run: |
133+
cd egs/yesno/voc1 && ./run.sh --use_fake_segments true
134+
- uses: actions/upload-artifact@v1
135+
if: failure()
136+
with:
137+
name: artifacts-${{ matrix.config }}
138+
path: egs/yesno/voc1
139+
105140
integration_vq:
106141
runs-on: ubuntu-20.04
107142
strategy:

egs/yesno/voc1/local/data_prep.sh

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ train_set="train_nodev"
1212
dev_set="dev"
1313
eval_set="eval"
1414
shuffle=false
15+
use_fake_segments=false
1516

1617
# shellcheck disable=SC1091
1718
. utils/parse_options.sh || exit 1;
@@ -31,6 +32,7 @@ if [ $# != 2 ]; then
3132
echo " --dev_set: name of dev set (default=dev)."
3233
echo " --eval_set: name of eval set (default=eval)."
3334
echo " --shuffle: whether to perform shuffle in making dev / eval set (default=false)."
35+
echo " --use_fake_segments: whether to use fake segments (default=false)."
3436
exit 1
3537
fi
3638

@@ -40,18 +42,28 @@ set -euo pipefail
4042

4143
# set filenames
4244
scp="${data_dir}/all/wav.scp"
45+
segments="${data_dir}/all/segments"
4346

4447
# check file existence
4548
[ -e "${scp}" ] && rm "${scp}"
49+
[ -e "${segments}" ] && rm "${segments}"
4650

4751
# make all scp
4852
find "${db_root}" -follow -name "*.wav" | sort | while read -r filename; do
4953
id=$(basename "${filename}" | sed -e "s/\.[^\.]*$//g")
5054
echo "${id} ${filename}" >> "${scp}"
55+
# NOTE(kan-bayashi): for integration test
56+
if "${use_fake_segments}"; then
57+
echo "${id}_1 ${id} 0.0 $(soxi -D "${filename}")" >> "${data_dir}/all/segments"
58+
echo "${id}_2 ${id} 0.0 $(soxi -D "${filename}")" >> "${data_dir}/all/segments"
59+
fi
5160
done
5261

5362
# split
5463
num_all=$(wc -l < "${scp}")
64+
if "${use_fake_segments}"; then
65+
num_all=$(wc -l < "${segments}")
66+
fi
5567
num_deveval=$((num_dev + num_eval))
5668
num_train=$((num_all - num_deveval))
5769
utils/split_data.sh \

egs/yesno/voc1/run.sh

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,9 @@ conf=conf/parallel_wavegan.v1.debug.yaml
2020
download_dir=downloads # direcotry to save downloaded files
2121
dumpdir=dump # directory to dump features
2222

23+
# data setting
24+
use_fake_segments=false # for testing
25+
2326
# training related setting
2427
tag="" # tag for directory to save model
2528
resume="" # checkpoint path to resume training
@@ -47,6 +50,7 @@ fi
4750
if [ "${stage}" -le 0 ] && [ "${stop_stage}" -ge 0 ]; then
4851
echo "Stage 0: Data preparation"
4952
local/data_prep.sh \
53+
--use_fake_segments "${use_fake_segments}" \
5054
--train_set "${train_set}" \
5155
--dev_set "${dev_set}" \
5256
--eval_set "${eval_set}" \

utils/make_subset_data.sh

Lines changed: 6 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -22,29 +22,21 @@ num_split=$2
2222
dst_dir=$3
2323

2424
src_scp=${src_dir}/wav.scp
25+
num_src_utts=$(wc -l < "${src_scp}")
26+
has_utt2spk=false
27+
has_segments=false
28+
2529
if [ -e "${src_dir}/segments" ]; then
2630
has_segments=true
2731
src_segments=${src_dir}/segments
28-
else
29-
has_segments=false
32+
num_src_utts=$(wc -l < "${src_segments}")
3033
fi
34+
3135
if [ -e "${src_dir}/utt2spk" ]; then
3236
has_utt2spk=true
3337
src_utt2spk=${src_dir}/utt2spk
34-
else
35-
has_utt2spk=false
3638
fi
37-
src_scp=${src_dir}/wav.scp
38-
num_src_utts=$(wc -l < "${src_scp}")
3939

40-
# NOTE: We assume that wav.scp and segments has the same number of lines
41-
if ${has_segments}; then
42-
num_src_segments=$(wc -l < "${src_segments}")
43-
if [ "${num_src_segments}" -ne "${num_src_utts}" ]; then
44-
echo "ERROR: wav.scp and segments has different #lines (${num_src_utts} vs ${num_src_segments})." >&2
45-
exit 1;
46-
fi
47-
fi
4840
if ${has_utt2spk}; then
4941
num_src_utt2spk=$(wc -l < "${src_utt2spk}")
5042
if [ "${num_src_utt2spk}" -ne "${num_src_utts}" ]; then

utils/split_data.sh

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ if [ $# -ne 3 ]; then
2828
exit 1
2929
fi
3030

31-
set -eu
31+
set -eux
3232

3333
src_dir=$1
3434
first_dist_dir=$2
@@ -49,7 +49,6 @@ if [ -e "${src_dir}/utt2spk" ]; then
4949
else
5050
has_utt2spk=false
5151
fi
52-
src_scp=${src_dir}/wav.scp
5352

5453
if ${has_utt2spk}; then
5554
num_src_utt2spk=$(wc -l < "${src_utt2spk}")

0 commit comments

Comments
 (0)