train tts
script to divide audio
srt_file=$1
audio_file=$2
#rm -Rf wav _data.csv
mkdir -p wav
function convert_to_second {
IFS=: read -r h m s <<<"$1"
echo $(((h * 60 + m) * 60 + s))
}
function cut_part_from_file {
FROM=$1
TO=$2
INPUT=$3
OUTPUT=$4
#LENGTH=$(($TO - $FROM))
ffmpeg -ss $FROM -to $TO -i $INPUT -ar 22050 $OUTPUT -hide_banner -loglevel error
}
stringContain() { case $2 in *$1* ) return 0;; *) return 1;; esac ;}
is_line_after_time="false"
counter=0
IFS=$'\n';
for line in $(cat $srt_file); do
echo $line
if [ ! -z "$line" ]; then
if [[ $is_line_after_time == "true" ]]; then
is_line_after_time="false"
echo "$counter|$line" >> _data.csv
fi
if [[ $line =~ "-->" ]]; then
echo $line
let "counter+=1"
is_line_after_time="true" # true in bash
start_time=$(echo $line | awk -F' --> ' '{print $1}' | sed 's/,/./g')
end_time=$(echo $line | awk -F' --> ' '{print $2}' | sed 's/,/./g')
echo $start_time " to " $end_time
#start_time_in_s=$(convert_to_second $start_time)
#end_time_in_s=$(convert_to_second $end_time)
#echo $start_time_in_s " to " $end_time_in_s
cut_part_from_file $start_time $end_time $audio_file "wav/$counter.wav"
fi
fi
done
train
python3.10 -m venv env-piper
source env-piper/bin/activate.fish
pip install wheel setuptools
git clone https://github.com/rhasspy/piper.git
cd piper/src/python/
pip install -e .
./build_monotonic_align.sh
pip install torchmetrics==0.11.4
python3 -m piper_train.preprocess --language fr --sample-rate 22050 --dataset-format ljspeech --single-speaker --input-dir /home/tjiho/info/ia/input/ --output-dir /home/tjiho/info/ia/output/
export 'PYTORCH_CUDA_ALLOC_CONF=max_split_size_mb:256'
python3 -m piper_train \
--dataset-dir /home/tjiho/info/ia/output/ \
--accelerator 'gpu' \
--devices 1 \
--batch-size 32 \
--validation-split 0.0 \
--num-test-examples 0 \
--max_epochs 5000 \
--checkpoint-epochs 1 \
--precision 32 \
--resume_from_checkpoint /home/tjiho/info/ia/base-siwis/epoch=3304-step=2050940.ckpt