diff --git a/Parsing/generate_qrinfo.sh b/Parsing/generate_qrinfo.sh new file mode 100755 index 0000000..af68730 --- /dev/null +++ b/Parsing/generate_qrinfo.sh @@ -0,0 +1,54 @@ +#!/bin/bash + +if [ -z "$1" ]; then + echo "Script to generate QR info JSONL data and logs for ReproNim session with parse_wQR.py tool" + echo "Usage: $0 " + exit 1 +fi + +# Set SESSION_DIR to the first command-line argument +SESSION_DIR=$1 +IN_DIR=$SESSION_DIR/reprostim-videos +OUT_DIR=$SESSION_DIR/timing-reprostim-videos +LOG_LEVEL=DEBUG + +echo "Generating QR info reprostim videos in session: $SESSION_DIR" +echo "Session reprostim video directory: $IN_DIR" +echo "QR info and logs will be saved to: $OUT_DIR" + +# Create the out directory if it does not exist +if [ ! -d "$OUT_DIR" ]; then + mkdir -p "$OUT_DIR" + echo "Created directory: $OUT_DIR" +fi + +# Count the number of .mkv files +total_files=$(ls "$IN_DIR"/*.mkv 2>/dev/null | wc -l | xargs) +echo "Total *.mkv files count: $total_files" +counter=1 + +# Iterate over .mkv files in IN_DIR +for file in "$IN_DIR"/*.mkv; +do + base_name=$(basename "$file" .mkv) + echo "Processing $counter/$total_files : $file..." + # this is normal video parsing: + #./parse_wQR.py --log-level $LOG_LEVEL $file >$OUT_DIR/$base_name.qrinfo.jsonl 2>$OUT_DIR/$base_name.qrinfo.log + + # but we have invalid videos, so cleanup it first + tmp_mkv_file=$OUT_DIR/$base_name.mkv + echo "Generating tmp *.mkv file $tmp_mkv_file..." + ffmpeg -i $file -an -c copy $tmp_mkv_file + ./parse_wQR.py --log-level $LOG_LEVEL $tmp_mkv_file >$OUT_DIR/$base_name.qrinfo.jsonl 2>$OUT_DIR/$base_name.qrinfo.log + if [ -e "$tmp_mkv_file" ]; then + echo "Deleting tmp *.mkv file: $tmp_mkv_file" + rm "$tmp_mkv_file" + fi + + counter=$((counter + 1)) +done + +# Generate QR info +#echo "Generating QR info data..." +#./parse_wQR.py --log-level $LOG_LEVEL $SESSION_DIR >$OUT_DIR/dump_qrinfo.jsonl 2>$OUT_DIR/dump_qrinfo.log +#echo "dump_qrinfo.py exit code: $?" diff --git a/Parsing/parse_wQR.py b/Parsing/parse_wQR.py index 11ea614..062512a 100755 --- a/Parsing/parse_wQR.py +++ b/Parsing/parse_wQR.py @@ -5,6 +5,7 @@ import os import re from datetime import datetime, timedelta +from pathlib import Path from re import match from typing import Optional @@ -22,6 +23,15 @@ logging.getLogger().addHandler(logging.StreamHandler(sys.stderr)) logger.debug(f"name={__name__}") +# Define class video info details +class InfoSummary(BaseModel): + path: Optional[str] = Field(None, description="Video file path") + rate_mbpm: Optional[float] = Field(0.0, description="Video file 'byterate' " + "in MB per minute.") + duration_sec: Optional[float] = Field(0.0, description="Duration of the video " + "in seconds") + size_mb: Optional[float] = Field(0.0, description="Video file size in MB.") + # Define class for video time info class VideoTimeInfo(BaseModel): @@ -175,12 +185,44 @@ def finalize_record(ps: ParseSummary, return record +def do_info_file(path: str): + logger.info(f"do_info_file({path})") + vti: VideoTimeInfo = get_video_time_info(path) + if not vti.success: + logger.error(f"Failed parse file name time pattern, error: {vti.error}") + return + o: InfoSummary = InfoSummary() + o.path = path + o.duration_sec = round(vti.duration_sec, 1) + size: float = os.path.getsize(path) + o.size_mb = round(size/(1000*1000), 1) + if o.duration_sec>0.0001: + o.rate_mbpm = round(size*60/(o.duration_sec*1000*1000), 1) + return o + + +def do_info(path: str): + p = Path(path) + if p.is_file(): + yield do_info_file(path) + elif p.is_dir(): + logger.info(f"Processing video directory: {path}") + for root, _, files in os.walk(path): + for file in files: + if file.endswith('.mkv'): + yield do_info_file(os.path.join(root, file)) + # Uncomment to visit only top-level dir + # break + else: + logger.error(f"Path not found: {path}") + + def do_parse(path_video: str): ps: ParseSummary = ParseSummary() vti: VideoTimeInfo = get_video_time_info(path_video) if not vti.success: - logger.error(f"Failed parse file name time patter, error: {vti.error}") + logger.error(f"Failed parse file name time pattern, error: {vti.error}") return logger.info(f"Video start time : {vti.start_time}") @@ -292,13 +334,21 @@ def do_parse(path_video: str): @click.command(help='Utility to parse video and locate integrated ' 'QR time codes.') @click.argument('path', type=click.Path(exists=True)) +@click.option('--mode', default='PARSE', + type=click.Choice(['PARSE', 'INFO']), + help='Specify execution mode. Default is "PARSE", ' + 'normal execution. ' + 'Use "INFO" to dump video file info like duration, ' + 'bitrate, file size etc, (in this case ' + '"path" argument specifies video file or directory ' + 'containing video files).') @click.option('--log-level', default='INFO', type=click.Choice(['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL']), help='Set the logging level') @click.pass_context -def main(ctx, path: str, log_level): +def main(ctx, path: str, mode: str, log_level): logger.setLevel(log_level) logger.debug("parse_wQR.py tool") logger.debug(f"Working dir : {os.getcwd()}") @@ -308,8 +358,14 @@ def main(ctx, path: str, log_level): logger.error(f"Path does not exist: {path}") return 1 - for item in do_parse(path): - print(item.json()) + if mode=="PARSE": + for item in do_parse(path): + print(item.model_dump_json()) + elif mode=="INFO": + for item in do_info(path): + print(item.model_dump_json()) + else: + logger.error(f"Unknown mode: {mode}") return 0