refactor(video2x): split the CLI into multiple files; improve CLI arg…

…s validation Signed-off-by: k4yt3x <i@k4yt3x.com>
k4yt3x · Dec 4, 2024 · 549b038 · 549b038
1 parent adf3baf
commit 549b038
Show file tree

Hide file tree

Showing 31 changed files with 928 additions and 834 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -19,6 +19,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 - Improve error handling and error messages.
 - Improve the CLI help message structure and clarity.
+- Improve CLI argument validation.
 
 ### Removed
 

diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -332,7 +332,6 @@ if(BUILD_VIDEO2X_CLI)
         ${ALL_INCLUDE_DIRS}
         ${CMAKE_CURRENT_BINARY_DIR}
         ${PROJECT_SOURCE_DIR}/include
-        ${PROJECT_SOURCE_DIR}/include/libvideo2x
         ${PROJECT_SOURCE_DIR}/tools/video2x/include
     )
 

diff --git a/README.md b/README.md
@@ -1,5 +1,6 @@
 <p align="center">
-   <img src="https://github.com/user-attachments/assets/5cd63373-e806-474f-94ec-6e04963bf90f"/>
+   <img src="https://github.com/user-attachments/assets/5cd63373-e806-474f-94ec-6e04963bf90f"
+        alt="Video2X: A machine learning-based video super resolution and frame interpolation framework."/>
    </br>
    <img src="https://img.shields.io/github/v/release/k4yt3x/video2x?style=flat-square"/>
    <img src="https://img.shields.io/github/downloads/k4yt3x/video2x/total?style=flat-square"/>
@@ -69,15 +70,7 @@ Join our Telegram discussion group to ask any questions you have about Video2X,
 
 Comprehensive documentation for Video2X is available at [https://docs.video2x.org/](https://docs.video2x.org/). It offers detailed instructions on how to [build](https://docs.video2x.org/building/index.html), [install](https://docs.video2x.org/installing/index.html), [use](https://docs.video2x.org/running/index.html), and [develop](https://docs.video2x.org/developing/index.html) with this program.
 
-## 🔰 Introduction
-
-Video2X is a machine-learning-powered framework for video upscaling and frame interpolation, built around three main components:
-
-- [libvideo2x](https://github.com/k4yt3x/video2x/blob/master/src/libvideo2x.cpp): The core C++ library providing upscaling and frame interpolation capabilities.
-- [Video2X CLI](https://github.com/k4yt3x/video2x/blob/master/src/video2x.c): A command-line interface that utilizes `libvideo2x` for video processing.
-- [Video2X Qt6](https://github.com/k4yt3x/video2x-qt6): A Qt6-based graphical interface that utilizes `libvideo2x` for video processing.
-
-### Video Demos
+## 📽️ Video Demos (Outdated)
 
 ![Spirited Away Demo](https://user-images.githubusercontent.com/21986859/49412428-65083280-f73a-11e8-8237-bb34158a545e.png)\
 _Upscale demo: Spirited Away's movie trailer_

diff --git a/include/libvideo2x/avutils.h b/include/libvideo2x/avutils.h
@@ -1,5 +1,4 @@
-#ifndef AVUTILS_H
-#define AVUTILS_H
+#pragma once
 
 extern "C" {
 #include <libavformat/avformat.h>
@@ -18,5 +17,3 @@ void av_bufferref_deleter(AVBufferRef *bufferref);
 void av_frame_deleter(AVFrame *frame);
 
 void av_packet_deleter(AVPacket *packet);
-
-#endif  // AVUTILS_H
diff --git a/include/libvideo2x/conversions.h b/include/libvideo2x/conversions.h
@@ -1,5 +1,4 @@
-#ifndef CONVERSIONS_H
-#define CONVERSIONS_H
+#pragma once
 
 extern "C" {
 #include <libavutil/frame.h>
@@ -16,5 +15,3 @@ ncnn::Mat avframe_to_ncnn_mat(AVFrame *frame);
 
 // Convert ncnn::Mat to AVFrame
 AVFrame *ncnn_mat_to_avframe(const ncnn::Mat &mat, AVPixelFormat pix_fmt);
-
-#endif  // CONVERSIONS_H
diff --git a/include/libvideo2x/decoder.h b/include/libvideo2x/decoder.h
@@ -1,5 +1,4 @@
-#ifndef DECODER_H
-#define DECODER_H
+#pragma once
 
 #include <filesystem>
 
@@ -27,5 +26,3 @@ class Decoder {
     AVCodecContext *dec_ctx_;
     int in_vstream_idx_;
 };
-
-#endif  // DECODER_H
diff --git a/include/libvideo2x/encoder.h b/include/libvideo2x/encoder.h
@@ -1,5 +1,4 @@
-#ifndef ENCODER_H
-#define ENCODER_H
+#pragma once
 
 #include <cstdint>
 #include <filesystem>
@@ -16,34 +15,32 @@ extern "C" {
 // Encoder configurations
 struct EncoderConfig {
     // Non-AVCodecContext options
-    AVCodecID codec;
-    bool copy_streams;
+    AVCodecID codec = AV_CODEC_ID_NONE;
+    bool copy_streams = true;
 
     // Basic video options
-    int width;
-    int height;
-    int frm_rate_mul;
-    AVPixelFormat pix_fmt;
+    int frm_rate_mul = 0;
+    AVPixelFormat pix_fmt = AV_PIX_FMT_NONE;
 
     // Rate control and compression
-    int64_t bit_rate;
-    int rc_buffer_size;
-    int rc_min_rate;
-    int rc_max_rate;
-    int qmin;
-    int qmax;
+    int64_t bit_rate = 0;
+    int rc_buffer_size = 0;
+    int rc_min_rate = 0;
+    int rc_max_rate = 0;
+    int qmin = -1;
+    int qmax = -1;
 
     // GOP and frame structure
-    int gop_size;
-    int max_b_frames;
-    int keyint_min;
-    int refs;
+    int gop_size = -1;
+    int max_b_frames = -1;
+    int keyint_min = -1;
+    int refs = -1;
 
     // Performance and threading
-    int thread_count;
+    int thread_count = 0;
 
     // Latency and buffering
-    int delay;
+    int delay = -1;
 
     // Extra AVOptions
     std::vector<std::pair<StringType, StringType>> extra_opts;
@@ -60,6 +57,8 @@ class Encoder {
         AVFormatContext *ifmt_ctx,
         AVCodecContext *dec_ctx,
         EncoderConfig &enc_cfg,
+        int width,
+        int height,
         int in_vstream_idx
     );
 
@@ -77,5 +76,3 @@ class Encoder {
     int out_vstream_idx_;
     int *stream_map_;
 };
-
-#endif  // ENCODER_H
diff --git a/include/libvideo2x/filter_libplacebo.h b/include/libvideo2x/filter_libplacebo.h
@@ -1,5 +1,4 @@
-#ifndef FILTER_LIBPLACEBO_H
-#define FILTER_LIBPLACEBO_H
+#pragma once
 
 #include <filesystem>
 
@@ -57,5 +56,3 @@ class FilterLibplacebo : public Filter {
     AVRational in_time_base_;
     AVRational out_time_base_;
 };
-
-#endif  // FILTER_LIBPLACEBO_H
diff --git a/include/libvideo2x/filter_realesrgan.h b/include/libvideo2x/filter_realesrgan.h
@@ -1,5 +1,4 @@
-#ifndef FILTER_REALESRGAN_H
-#define FILTER_REALESRGAN_H
+#pragma once
 
 extern "C" {
 #include <libavcodec/avcodec.h>
@@ -50,5 +49,3 @@ class FilterRealesrgan : public Filter {
     AVRational out_time_base_;
     AVPixelFormat out_pix_fmt_;
 };
-
-#endif  // FILTER_REALESRGAN_H
diff --git a/include/libvideo2x/fsutils.h b/include/libvideo2x/fsutils.h
@@ -1,5 +1,4 @@
-#ifndef FSUTILS_H
-#define FSUTILS_H
+#pragma once
 
 #include <filesystem>
 #include <string>
@@ -29,5 +28,3 @@ std::string wstring_to_u8string(const StringType &wstr);
 StringType path_to_string_type(const std::filesystem::path &path);
 
 StringType to_string_type(int value);
-
-#endif  // FSUTILS_H
diff --git a/include/libvideo2x/interpolator_rife.h b/include/libvideo2x/interpolator_rife.h
@@ -1,5 +1,4 @@
-#ifndef INTERPOLATOR_RIFE_H
-#define INTERPOLATOR_RIFE_H
+#pragma once
 
 extern "C" {
 #include <libavcodec/avcodec.h>
@@ -55,5 +54,3 @@ class InterpolatorRIFE : public Interpolator {
     AVRational out_time_base_;
     AVPixelFormat out_pix_fmt_;
 };
-
-#endif  // INTERPOLATOR_RIFE_H
diff --git a/include/libvideo2x/libplacebo.h b/include/libvideo2x/libplacebo.h
@@ -1,5 +1,4 @@
-#ifndef PLACEBO_H
-#define PLACEBO_H
+#pragma once
 
 #include <filesystem>
 
@@ -18,5 +17,3 @@ int init_libplacebo(
     uint32_t vk_device_index,
     const std::filesystem::path &shader_path
 );
-
-#endif  // PLACEBO_H
diff --git a/include/libvideo2x/libvideo2x.h b/include/libvideo2x/libvideo2x.h
@@ -1,5 +1,4 @@
-#ifndef LIBVIDEO2X_H
-#define LIBVIDEO2X_H
+#pragma once
 
 #include <atomic>
 #include <cstdint>
@@ -26,19 +25,15 @@ extern "C" {
 #define LIBVIDEO2X_API
 #endif
 
-struct HardwareConfig {
-    uint32_t vk_device_index;
-    AVHWDeviceType hw_device_type;
-};
-
 class LIBVIDEO2X_API VideoProcessor {
    public:
     VideoProcessor(
-        const HardwareConfig hw_cfg,
         const ProcessorConfig proc_cfg,
-        EncoderConfig enc_cfg,
-        Video2xLogLevel = Video2xLogLevel::Info,
-        bool benchmark = false
+        const EncoderConfig enc_cfg,
+        const uint32_t vk_device_index = 0,
+        const AVHWDeviceType hw_device_type = AV_HWDEVICE_TYPE_NONE,
+        const Video2xLogLevel = Video2xLogLevel::Info,
+        const bool benchmark = false
     );
 
     virtual ~VideoProcessor() = default;
@@ -85,9 +80,10 @@ class LIBVIDEO2X_API VideoProcessor {
         AVFrame *proc_frame
     );
 
-    HardwareConfig hw_cfg_;
     ProcessorConfig proc_cfg_;
     EncoderConfig enc_cfg_;
+    uint32_t vk_device_index_ = 0;
+    AVHWDeviceType hw_device_type_ = AV_HWDEVICE_TYPE_NONE;
     bool benchmark_ = false;
 
     std::atomic<int64_t> frame_index_ = 0;
@@ -96,5 +92,3 @@ class LIBVIDEO2X_API VideoProcessor {
     std::atomic<bool> aborted_ = false;
     std::atomic<bool> completed_ = false;
 };
-
-#endif  // LIBVIDEO2X_H
diff --git a/include/libvideo2x/logging.h b/include/libvideo2x/logging.h
@@ -1,5 +1,4 @@
-#ifndef LOGGING_H
-#define LOGGING_H
+#pragma once
 
 #include <optional>
 
@@ -19,5 +18,3 @@ enum class Video2xLogLevel {
 void set_log_level(Video2xLogLevel log_level);
 
 std::optional<Video2xLogLevel> find_log_level_by_name(const StringType &log_level_name);
-
-#endif  // LOGGING_H
diff --git a/include/libvideo2x/processor.h b/include/libvideo2x/processor.h
@@ -1,5 +1,4 @@
-#ifndef PROCESSOR_H
-#define PROCESSOR_H
+#pragma once
 
 #include <variant>
 #include <vector>
@@ -18,6 +17,7 @@ enum class ProcessingMode {
 };
 
 enum class ProcessorType {
+    None,
     Libplacebo,
     RealESRGAN,
     RIFE,
@@ -28,26 +28,26 @@ struct LibplaceboConfig {
 };
 
 struct RealESRGANConfig {
-    bool tta_mode;
+    bool tta_mode = false;
     StringType model_name;
 };
 
 struct RIFEConfig {
-    bool tta_mode;
-    bool tta_temporal_mode;
-    bool uhd_mode;
-    int num_threads;
+    bool tta_mode = false;
+    bool tta_temporal_mode = false;
+    bool uhd_mode = false;
+    int num_threads = 0;
     StringType model_name;
 };
 
 // Unified filter configuration
 struct ProcessorConfig {
-    ProcessorType processor_type;
-    int width;
-    int height;
-    int scaling_factor;
-    int frm_rate_mul;
-    float scn_det_thresh;
+    ProcessorType processor_type = ProcessorType::None;
+    int width = 0;
+    int height = 0;
+    int scaling_factor = 0;
+    int frm_rate_mul = 0;
+    float scn_det_thresh = 0.0f;
     std::variant<LibplaceboConfig, RealESRGANConfig, RIFEConfig> config;
 };
 
@@ -81,5 +81,3 @@ class Interpolator : public Processor {
     virtual int
     interpolate(AVFrame *prev_frame, AVFrame *in_frame, AVFrame **out_frame, float time_step) = 0;
 };
-
-#endif  // PROCESSOR_H
diff --git a/include/libvideo2x/processor_factory.h b/include/libvideo2x/processor_factory.h
@@ -1,5 +1,4 @@
-#ifndef PROCESSOR_FACTORY_H
-#define PROCESSOR_FACTORY_H
+#pragma once
 
 #include <functional>
 #include <memory>
@@ -32,5 +31,3 @@ class ProcessorFactory {
     // Static initializer for default processors
     static void init_default_processors(ProcessorFactory &factory);
 };
-
-#endif  // PROCESSOR_FACTORY_H
diff --git a/include/libvideo2x/version.h.in b/include/libvideo2x/version.h.in
@@ -1,6 +1,3 @@
-#ifndef VERSION_H
-#define VERSION_H
+#pragma once
 
 #define LIBVIDEO2X_VERSION_STRING "@PROJECT_VERSION@"
-
-#endif  // VERSION_H
diff --git a/src/avutils.cpp b/src/avutils.cpp
@@ -169,9 +169,8 @@ float get_frame_diff(AVFrame *frame1, AVFrame *frame2) {
         uint8_t *ptr1 = rgb_frame1->data[0] + y * rgb_frame1->linesize[0];
         uint8_t *ptr2 = rgb_frame2->data[0] + y * rgb_frame2->linesize[0];
         for (int x = 0; x < width * 3; x++) {
-            sum_diff += static_cast<uint64_t>(
-                std::abs(static_cast<int>(ptr1[x]) - static_cast<int>(ptr2[x]))
-            );
+            sum_diff +=
+                static_cast<uint64_t>(ptr1[x] > ptr2[x] ? ptr1[x] - ptr2[x] : ptr2[x] - ptr1[x]);
             max_diff += 255;
         }
     }

diff --git a/src/encoder.cpp b/src/encoder.cpp
@@ -33,6 +33,8 @@ int Encoder::init(
     AVFormatContext *ifmt_ctx,
     AVCodecContext *dec_ctx,
     EncoderConfig &enc_cfg,
+    int width,
+    int height,
     int in_vstream_idx
 ) {
     int ret;
@@ -84,8 +86,8 @@ int Encoder::init(
     enc_ctx_->sample_aspect_ratio = dec_ctx->sample_aspect_ratio;
 
     // Set basic video options
-    enc_ctx_->width = enc_cfg.width;
-    enc_ctx_->height = enc_cfg.height;
+    enc_ctx_->width = width;
+    enc_ctx_->height = height;
 
     // Set rate control and compression options
     enc_ctx_->bit_rate = enc_cfg.bit_rate;