Added compressor "FLOAT" (cast to IEEE-754 single precision floating …

…point)
a-ma72 · Dec 20, 2021 · 75a42e4 · 75a42e4
1 parent 57e2657
commit 75a42e4
Show file tree

Hide file tree

Showing 4 changed files with 128 additions and 3 deletions.
diff --git a/Changelog.txt b/Changelog.txt
@@ -4,6 +4,7 @@
   methods QLIN16 and QLOG16 have no effect (data stored uncompressed).
 - SQLite update to version 3.37.0
 - Supporting SQLITE_ENABLE_MATH_FUNCTIONS
+- Added compressor "FLOAT" (cast to IEEE-754 single precision floating point)
 
 Version 2.11 (2. Feb. 2021)
 - Added buildin functions uuid(), uuid_str(X), uuid_blob(X) from uuid.c (sqlite.org)

diff --git a/mksqlite.cpp b/mksqlite.cpp
@@ -1299,6 +1299,11 @@ class Mksqlite
                 new_compression_type = QLOG16_ID;
                 new_compression_level = ( new_compression_level > 0 ); // only 0 or 1
             } 
+            else if( STRMATCH( new_compressor, FLOAT_ID ) )
+            {
+                new_compression_type = FLOAT_ID;
+                new_compression_level = ( new_compression_level > 0 ); // only 0 or 1
+            } 
             else 
             {
                 m_err.set( MSG_INVALIDARG );

diff --git a/number_compressor.hpp b/number_compressor.hpp
@@ -53,6 +53,7 @@ extern "C"
 #endif
 #define QLIN16_ID               "QLIN16"
 #define QLOG16_ID               "QLOG16"
+#define FLOAT_ID                "FLOAT"
 /** @} */
 
 /// Which compression method is to use, if its name is empty
@@ -70,6 +71,7 @@ class NumberCompressor
         CT_BLOSC,         ///< using BLOSC compressor (lossless)
         CT_QLIN16,        ///< using linear quantization (lossy)
         CT_QLOG16,        ///< using logarithmic quantization (lossy)
+        CT_FLOAT,         ///< using 4 byte single precision floating points (IEEE-754, lossy)
     } compressor_type_e;
 
     bool                    m_result_is_const;        ///< true, if result is const type
@@ -213,6 +215,10 @@ class NumberCompressor
         {
             eCompressorType = CT_NONE;
         }        
+        else if( 0 == _strcmpi( strCompressorType, FLOAT_ID ) )
+        {
+            eCompressorType = CT_FLOAT;
+        }
         else if( 0 == _strcmpi( strCompressorType, QLIN16_ID ) )
         {
             eCompressorType = CT_QLIN16;
@@ -303,7 +309,7 @@ class NumberCompressor
     /// Returns true, if current compressor modifies value data
     bool isLossy()
     {
-        return m_eCompressorType == CT_QLIN16 || m_eCompressorType == CT_QLOG16;
+        return m_eCompressorType == CT_QLIN16 || m_eCompressorType == CT_QLOG16 || m_eCompressorType == CT_FLOAT;
     }
 
 
@@ -340,6 +346,13 @@ class NumberCompressor
             status = bloscCompress();
             break;
 
+          case CT_FLOAT:
+#if MKSQLITE_CONFIG_USE_LOGGING
+            log_trace( "FLOAT compress %ld elements", (long)m_rdata_size );
+#endif
+            status = floatCompress();
+            break;
+
           case CT_QLIN16:
 #if MKSQLITE_CONFIG_USE_LOGGING
             log_trace( "QLIN16 compress %ld elements", (long)m_rdata_size );
@@ -404,6 +417,13 @@ class NumberCompressor
             status = bloscDecompress();
             break;
 
+          case CT_FLOAT:
+#if MKSQLITE_CONFIG_USE_LOGGING
+            log_trace( "FLOAT uncompress %ld elements", (long)m_rdata_size );
+#endif
+            status = floatDecompress();
+            break;
+
           case CT_QLIN16:
 #if MKSQLITE_CONFIG_USE_LOGGING
             log_trace( "QLIN16 uncompress %ld elements", (long)m_rdata_size );
@@ -515,6 +535,89 @@ class NumberCompressor
     }
 
 
+    /**
+     * \brief Lossy data compression using IEEE-754 single precision floating points
+     *
+     * Allocates \p m_cdata and use it to store compressed data from \p m_rdata.
+     * Only double types accepted! NaN, +Inf and -Inf are allowed.
+     * 
+     */
+    bool floatCompress()
+    {
+        assert( m_rdata && !m_cdata && 
+                m_rdata_element_size == sizeof( double ) && 
+                m_rdata_size % m_rdata_element_size == 0 );
+
+        double*   rdata = (double*)m_rdata;
+        size_t    cntElements = m_rdata_size / sizeof(*rdata);
+        float*    pFloatData;
+
+        // compressor works for double type only
+        if( !m_rdata_is_double_type )
+        {
+            m_err.set( MSG_ERRCOMPRARG );
+            return false;
+        }
+
+        // compressor converts each value to float type
+        m_cdata_size = cntElements * sizeof( float );  
+        m_cdata      = m_Allocator( m_cdata_size );
+
+        if( !m_cdata )
+        {
+            m_err.set( MSG_ERRMEMORY );
+            return false;
+        }
+
+        pFloatData = (float*)m_cdata;
+
+        // type cast
+        for( size_t i = 0; i < cntElements; i++ )
+        {
+            *pFloatData++ = (float)*rdata++;
+        }
+
+        return true;
+    }
+
+
+    /**
+     * \brief Lossy data compression using IEEE-754 single precision floating points
+     *
+     * \returns true on success
+     * 
+     * Uncompress compressed data \p m_cdata to data \p m_rdata.
+     * \p m_rdata must point to writable storage space and
+     * \p m_rdata_size must specify the legal space.
+     * (lossy data compression)
+     */
+    bool floatDecompress()
+    {
+        assert( m_rdata && m_cdata && 
+                m_rdata_element_size == sizeof( double ) && 
+                m_rdata_size % m_rdata_element_size == 0 );
+
+        double*   rdata = (double*)m_rdata;
+        size_t    cntElements = m_rdata_size / sizeof(*rdata);
+        float*    pFloatData = (float*)m_cdata;
+
+        // compressor works for double type only
+        if( m_rdata_is_double_type )
+        {
+            m_err.set( MSG_ERRCOMPRARG );
+            return false;
+        }
+
+        // type cast
+        for( size_t i = 0; i < cntElements; i++ )
+        {
+            *rdata++ = (double)*pFloatData++;
+        }
+
+        return true;
+    }
+
+
     /**
      * \brief Lossy data compression by linear or logarithmic quantization (16 bit)
      *

diff --git a/sql_builtin_functions.hpp b/sql_builtin_functions.hpp
@@ -831,10 +831,20 @@ int blob_pack( const mxArray* pcItem, bool bStreamable,
     if( g_compression_level )
     {
         double start_time = utils_get_wall_time();
+        bool   status;
 
-        numericSequence.pack( value.Data(), value.ByData(), value.ByElement(), 
-                              value.IsDoubleClass() );  // allocates m_rdata
+#if MKSQLITE_CONFIG_USE_LOGGING
+        log_trace( "Start compression" );
+#endif
+        status = numericSequence.pack( value.Data(), value.ByData(), value.ByElement(), 
+                                       value.IsDoubleClass() );  // allocates m_rdata
 
+#if MKSQLITE_CONFIG_USE_LOGGING
+        if( !status )
+        {
+            log_trace( "Compression failed" );
+        }
+#endif
         *pdProcess_time = utils_get_wall_time() - start_time;
 
         // any compressed data omitted?
@@ -855,8 +865,14 @@ int blob_pack( const mxArray* pcItem, bool bStreamable,
             // calculate the compression ratio
             *pdRatio = (double)*pBlob_size / blob_size_uncompressed;
 
+#if MKSQLITE_CONFIG_USE_LOGGING
+            log_trace( "Compression (%ld <-- %ld)", (long)*pBlob_size, (long)blob_size_uncompressed );
+#endif
             if( *pBlob_size >= blob_size_uncompressed )
             {
+#if MKSQLITE_CONFIG_USE_LOGGING
+                log_trace( "Discard compression" );
+#endif
                 // Switch zu uncompressed blob, it's not worth the efford.
                 numericSequence.free_result();
             }