Différences
Ci-dessous, les différences entre deux révisions de la page.
| Les deux révisions précédentesRévision précédenteProchaine révision | Révision précédente | ||
| extraire_sous-titres_video [Le 26/08/2015, 20:41] – [Algorithme] albanmartel | extraire_sous-titres_video [Le 22/12/2020, 14:49] (Version actuelle) – [Algorithme] alban.f.j.martel | ||
|---|---|---|---|
| Ligne 1: | Ligne 1: | ||
| + | {{tag> | ||
| + | ====== Extraction des sous-titres d'une vidéo ====== | ||
| + | |||
| + | Scripts bash permettant de convertir plusieurs pistes de sous-titre vobsub d'une vidéo enregistrer sur la TNT par exemple et de les convertir en sous-titres texte .srt. | ||
| + | |||
| + | |||
| + | < | ||
| + | </ | ||
| + | |||
| + | ===== Algorithme ===== | ||
| + | |||
| + | Inspiré de l' | ||
| + | |||
| + | <note warning> | ||
| + | |||
| + | * Repérer les canaux de sous-titres d'un fichier vidéo avec ffmpeg | ||
| + | * extraire sous-titres vobsub dans une vidéos mastroika | ||
| + | * extraire les sous-titres des vidéos mastroïka avec mkvextract | ||
| + | * convertir fichier .sub et .idx en tiff; | ||
| + | * reconnaissance de caractère de chaque tiff avec cunéiform | ||
| + | * création d'un fichier srt | ||
| + | |||
| + | ==== Script bash correspondant à l' | ||
| + | |||
| + | [[https:// | ||
| + | |||
| + | <file bash> | ||
| + | # !/bin/bash | ||
| + | # OUTPUT-COLORING | ||
| + | red=$( tput setaf 1 ) | ||
| + | green=$( tput setaf 2 ) | ||
| + | NC=$( tput sgr0 ) # or perhaps: tput sgr0 | ||
| + | #NC=$( tput setaf 0 ) # or perhaps: tput sgr0 | ||
| + | |||
| + | # Dépendances : ffmpeg, | ||
| + | # Signale quel programme l'on exécute | ||
| + | # puis la composition du répertoire où le script s' | ||
| + | echo -e " | ||
| + | Composition du répertoire courant :\n | ||
| + | $(ls)" | ||
| + | |||
| + | # Invite de commande pour entrer le fichier vidéo à traiter | ||
| + | echo -n " | ||
| + | read film_a_traiter; | ||
| + | |||
| + | # Message pour informer l' | ||
| + | echo -e "Le fichier vidéo choisi est : \n $film_a_traiter" | ||
| + | |||
| + | # Exemple film_a_traiter=" | ||
| + | # film_a_traiter=" | ||
| + | |||
| + | # soustitres_array= (" | ||
| + | soustitres_array=($(ffprobe $film_a_traiter -v quiet -show_entries stream=index: | ||
| + | |||
| + | # metadata_sub=" | ||
| + | metadata_sub=$(for (( c=0; c< | ||
| + | |||
| + | # command1=" | ||
| + | command1=$(echo " | ||
| + | |||
| + | # Execution commande n°1 $command1 | ||
| + | $command1 | ||
| + | |||
| + | # vobsub_piste=" | ||
| + | vobsub_piste=$(for (( c=0; c< | ||
| + | |||
| + | # | ||
| + | command2=$(echo " | ||
| + | |||
| + | # Execution commande n°2 $command2 | ||
| + | $command2 | ||
| + | |||
| + | # Exécution Roc des fichiers de sous-titres | ||
| + | #vobsub2srt 0_; vobsub2srt 1_;" | ||
| + | for (( c=0; c< | ||
| + | do | ||
| + | | ||
| + | done | ||
| + | exit 0; | ||
| + | </ | ||
| + | |||
| + | ==== Ancien Exemple de séquence bash correspondant à l' | ||
| + | |||
| + | <file bash> | ||
| + | ffmpeg -i data0003.ts 2>&1 | grep subtitle | ||
| + | mkdir data0003 | ||
| + | ffmpeg -i data0003.ts -map 0:4 -map 0:5 -vn -an -scodec dvdsub data0003.mkv | ||
| + | mkvextract tracks " | ||
| + | mkvextract tracks " | ||
| + | subp2tiff --sid=0 -n data0003/0_ | ||
| + | subp2tiff --sid=1 -n data0003/1_ | ||
| + | for eachTiff in data0003/ | ||
| + | subptools -s -w -t srt -i data0003/ | ||
| + | subptools -s -w -t srt -i data0003/ | ||
| + | </ | ||
| + | |||
| + | ==== Exemple de conversion de fichier TS en MKV avec FFMPEG | ||
| + | |||
| + | La commande suivante permet d' | ||
| + | |||
| + | |||
| + | 9 pistes : | ||
| + | |||
| + | 0:0 vidéo | ||
| + | |||
| + | 0:1 ne contenant pas de données | ||
| + | |||
| + | 0:2 Audio | ||
| + | |||
| + | 0:3 ne contenant pas de données | ||
| + | |||
| + | 0:5 Subtite | ||
| + | |||
| + | 0:6 subtitle | ||
| + | |||
| + | 0:7 piste non reconnue par ffmpeg | ||
| + | |||
| + | 0:8 piste non reconnue par ffmpeg | ||
| + | |||
| + | < | ||
| + | ffmpeg -threads 4 -i data0003.ts -map 0:0 -map 0:2 -map 0:5 -map 0:6 -acodec copy -vcodec copy -scodec dvdsub output.mkv | ||
| + | </ | ||
| + | |||
| + | < | ||
| + | |||
| + | -// map// pour spécifier toutes les pistes à utilisables | ||
| + | |||
| + | //-acodec// suivi de // | ||
| + | |||
| + | //-vcodec// suivi de //copy// pour le traitement de la piste vidéo | ||
| + | // | ||
| + | -scodec// suivi de //dvdsub// pour le traitement des sous-titres | ||
| + | |||
| + | </ | ||
| + | ===== dépendances à installer | ||
| + | |||
| + | |||
| + | |||
| + | **[[apt> | ||
| + | |||
| + | **[[apt> | ||
| + | |||
| + | **[[apt> | ||
| + | |||
| + | **[[apt> | ||
| + | |||
| + | **[[apt> | ||
| + | |||
| + | |||
| + | ===== Script ts2srt | ||
| + | |||
| + | <note warning> | ||
| + | |||
| + | [[https:// | ||
| + | |||
| + | <file bash> | ||
| + | # ---------------------------------------------------- | ||
| + | # Script'' | ||
| + | # ---------------------------------------------------- | ||
| + | |||
| + | # Par '' | ||
| + | # Courriel : albanmartel(POINT)developpeur(AT)gmail(POINT)com | ||
| + | # Utilisant comme base de travail le script de beguam | ||
| + | # http:// | ||
| + | # License : GNU GPL | ||
| + | # Ce script permet d' | ||
| + | # | ||
| + | # Depends : | ||
| + | # ffmpeg est une collection de logiciels libres destinés au traitement de flux audio ou vidéo | ||
| + | # mkvToolnix (interface graphique pour mkvmerge) est un ensemble d' | ||
| + | # cuneiform - Système de reconnaissance optique de caractères multi-langue | ||
| + | # ogmrip - Application pour extraire et encoder des DVDs | ||
| + | # | ||
| + | # Date : 26/08/2015 | ||
| + | # version : 0.1 | ||
| + | # Mise-à-jour : | ||
| + | # ---------------------------------------------------- | ||
| + | |||
| + | # !/bin/bash | ||
| + | # OUTPUT-COLORING | ||
| + | red=$( tput setaf 1 ) | ||
| + | green=$( tput setaf 2 ) | ||
| + | NC=$( tput sgr0 ) # or perhaps: tput sgr0 | ||
| + | #NC=$( tput setaf 0 ) # or perhaps: tput sgr0 | ||
| + | |||
| + | |||
| + | function readDirectoryPath(){ | ||
| + | echo -n " | ||
| + | read directory; | ||
| + | courant_directory=$(pwd); | ||
| + | if [[ ! -e " | ||
| + | echo " | ||
| + | readDirectoryPath; | ||
| + | fi | ||
| + | cd $directory; | ||
| + | directory=$(pwd); | ||
| + | cd $courant_directory; | ||
| + | echo $directory; | ||
| + | # | ||
| + | } | ||
| + | |||
| + | |||
| + | function presentationOfFileDirectory(){ | ||
| + | message=$("<<" | ||
| + | files=$(ls $directory/ | ||
| + | print " | ||
| + | print " | ||
| + | } | ||
| + | |||
| + | |||
| + | function readVideoExtension(){ | ||
| + | echo -n " | ||
| + | read extension; | ||
| + | testIfAnyFileIsPresent=$(find $directory -maxdepth 1 -iname " | ||
| + | } | ||
| + | |||
| + | |||
| + | function choiseTypeOfVideo(){ | ||
| + | local tmp_videos="" | ||
| + | count=0; | ||
| + | readDirectoryPath; | ||
| + | presentationOfFileDirectory; | ||
| + | testIfAnyFileIsPresent=0; | ||
| + | while [ $testIfAnyFileIsPresent = 0 ] && [ $count != 3 ] ; do | ||
| + | count=$(($count+1)); | ||
| + | readVideoExtension; | ||
| + | done | ||
| + | if [ $count = 3 ] ; then | ||
| + | print " | ||
| + | print " | ||
| + | exit 100; | ||
| + | fi | ||
| + | #/ | ||
| + | #Example : data0001.ts data0002.ts data0003.ts | ||
| + | cd $directory; | ||
| + | videoFiles=($( ls *.$extension )); | ||
| + | cd $courant_directory; | ||
| + | } | ||
| + | |||
| + | |||
| + | function cleanVideoInformations() { | ||
| + | cat $1 | grep Imput > $2; | ||
| + | cat $1 | grep Duration >> $2; | ||
| + | cat $1 | grep Stream >> $2; | ||
| + | rm $1; | ||
| + | } | ||
| + | |||
| + | |||
| + | function prepareCommandToObtainSubtitlesTrackNumer(){ | ||
| + | local a; | ||
| + | #local j=0; | ||
| + | for (( i=0 ; i < ${# | ||
| + | #/ | ||
| + | data_videos_files[i]=$(echo $directory"/" | ||
| + | #/ | ||
| + | tmp_video_info[i]=$(echo "/ | ||
| + | #/ | ||
| + | video_info_file[i]=$(echo / | ||
| + | #ffprobe / | ||
| + | ffprobe ${data_videos_files[i]} 2>&1 | grep ' | ||
| + | #create a cleanning file of video information | ||
| + | cleanVideoInformations " | ||
| + | #cat / | ||
| + | # tracks_Info[1] =Stream #0:5(fra): Subtitle: dvd_subtitle (default) Stream #0:6(ger): Subtitle: dvd_subtitle Stream #0:7(fra): Subtitle: dvd_subtitle | ||
| + | tracks_Info[i]=$(cat ${video_info_file[i]} | grep Subtitle | sed " | ||
| + | rm ${video_info_file[i]}; | ||
| + | done | ||
| + | } | ||
| + | |||
| + | |||
| + | function createDirectoryIfNotExist(){ | ||
| + | if [[ ! -e $1 ]] ; then | ||
| + | mkdir $1; | ||
| + | fi | ||
| + | | ||
| + | } | ||
| + | |||
| + | function ExtractSubtitleFromVideoInMKV(){ | ||
| + | local j=0; | ||
| + | local k=0; | ||
| + | $1 | ||
| + | echo ' | ||
| + | for ((i=0 ; i< ${# | ||
| + | #From : data0001.ts to: data0001 | ||
| + | extract_work_files[i]=$(echo ${videoFiles[i]} | sed " | ||
| + | #echo ${directory}/ | ||
| + | createDirectoryIfNotExist " | ||
| + | tmp=($(echo ${tracks_Info[i]})); | ||
| + | for each in ${tmp[@]}; | ||
| + | # De : 0:4#fra à : 0.4 fra | ||
| + | #0:4 | ||
| + | track_number=$(echo $each | cut -d'#' | ||
| + | #fra | ||
| + | track_lang=$(echo $each | cut -d'#' | ||
| + | #ffmpeg -threads 4 -i / | ||
| + | ffmpeg -threads 4 -i ${directory}/ | ||
| + | mkv_files[k]=$(echo ${directory}/ | ||
| + | mkv_directories[k]=$(echo ${directory}/ | ||
| + | subtitle_sub_id[k]=$(echo ${extract_work_files[i]}\_$j\_$track_lang); | ||
| + | subtitle_lang[k]=$(echo $track_lang); | ||
| + | j=$(($j + 1)); | ||
| + | k=$(($k + 1)) | ||
| + | done | ||
| + | done | ||
| + | } | ||
| + | |||
| + | |||
| + | |||
| + | function OpticalRecognitionCharacterOfTiff(){ | ||
| + | for eachTiff in $1*.tif; do | ||
| + | cuneiform -l $2 -f text -o $eachTiff.txt $eachTiff; | ||
| + | done | ||
| + | |||
| + | } | ||
| + | |||
| + | |||
| + | function convertMKVSubtitleInSRT(){ | ||
| + | local j=0 | ||
| + | for (( i=0 ; i < ${# | ||
| + | #mkvextract tracks / | ||
| + | mkvextract tracks ${mkv_files[i]} -c ISO8859-1 0: | ||
| + | # if sub file existe and has a size equal to 0 than erase sub and idx files | ||
| + | if [ ! -s ${mkv_directories[i]}/ | ||
| + | rm ${mkv_directories[i]}/ | ||
| + | rm ${mkv_directories[i]}/ | ||
| + | fi | ||
| + | done | ||
| + | |||
| + | for (( i=0 ; i < ${# | ||
| + | work_directories=$(echo ${videoFiles[i]} | sed " | ||
| + | # | ||
| + | for each in $directory/ | ||
| + | subtitle_sub=($(echo $each)); | ||
| + | for filesub in ${subtitle_sub[@]}; | ||
| + | # | ||
| + | id_file=$(echo $filesub | sed " | ||
| + | subp2tiff --sid=0 -n $id_file; | ||
| + | #fra | ||
| + | sub_lang=$(echo $filesub | sed " | ||
| + | # | ||
| + | subtitle_file_name=$(echo $filesub | sed " | ||
| + | echo " | ||
| + | # | ||
| + | # | ||
| + | OpticalRecognitionCharacterOfTiff " | ||
| + | #echo $directory/ | ||
| + | # tmp= Le_Secret_des_poignards_Volants_base__fra | ||
| + | tmp=$(echo $filesub | sed " | ||
| + | echo " | ||
| + | subptools -s -w -t srt -i $id_file.xml -o $directory/ | ||
| + | done | ||
| + | rm -rf $directory/ | ||
| + | done | ||
| + | done | ||
| + | } | ||
| + | |||
| + | choiseTypeOfVideo; | ||
| + | prepareCommandToObtainSubtitlesTrackNumer; | ||
| + | ExtractSubtitleFromVideoInMKV | ||
| + | convertMKVSubtitleInSRT; | ||
| + | exit 0; | ||
| + | </ | ||
| + | |||
| + | ===== Contributeurs ===== | ||
| + | [[: | ||
