Skip to content

Commit

Permalink
feat: indexing on scalar8
Browse files Browse the repository at this point in the history
Signed-off-by: usamoi <usamoi@outlook.com>
  • Loading branch information
usamoi committed Dec 17, 2024
1 parent f88b1ac commit dbd7659
Show file tree
Hide file tree
Showing 11 changed files with 308 additions and 20 deletions.
21 changes: 21 additions & 0 deletions src/sql/finalize.sql
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,9 @@ CREATE OPERATOR FAMILY vector_cosine_ops USING vchordrq;
CREATE OPERATOR FAMILY halfvec_l2_ops USING vchordrq;
CREATE OPERATOR FAMILY halfvec_ip_ops USING vchordrq;
CREATE OPERATOR FAMILY halfvec_cosine_ops USING vchordrq;
CREATE OPERATOR FAMILY scalar8_l2_ops USING vchordrq;
CREATE OPERATOR FAMILY scalar8_ip_ops USING vchordrq;
CREATE OPERATOR FAMILY scalar8_cosine_ops USING vchordrq;

CREATE OPERATOR FAMILY vector_l2_ops USING Vchordrqfscan;
CREATE OPERATOR FAMILY vector_ip_ops USING Vchordrqfscan;
Expand Down Expand Up @@ -199,6 +202,24 @@ CREATE OPERATOR CLASS halfvec_cosine_ops
OPERATOR 2 <<=>> (halfvec, sphere_halfvec) FOR SEARCH,
FUNCTION 1 _vchordrq_support_halfvec_cosine_ops();

CREATE OPERATOR CLASS scalar8_l2_ops
FOR TYPE scalar8 USING vchordrq FAMILY scalar8_l2_ops AS
OPERATOR 1 <-> (scalar8, scalar8) FOR ORDER BY float_ops,
OPERATOR 2 <<->> (scalar8, sphere_scalar8) FOR SEARCH,
FUNCTION 1 _vchordrq_support_scalar8_l2_ops();

CREATE OPERATOR CLASS scalar8_ip_ops
FOR TYPE scalar8 USING vchordrq FAMILY scalar8_ip_ops AS
OPERATOR 1 <#> (scalar8, scalar8) FOR ORDER BY float_ops,
OPERATOR 2 <<#>> (scalar8, sphere_scalar8) FOR SEARCH,
FUNCTION 1 _vchordrq_support_scalar8_ip_ops();

CREATE OPERATOR CLASS scalar8_cosine_ops
FOR TYPE scalar8 USING vchordrq FAMILY scalar8_cosine_ops AS
OPERATOR 1 <=> (scalar8, scalar8) FOR ORDER BY float_ops,
OPERATOR 2 <<=>> (scalar8, sphere_scalar8) FOR SEARCH,
FUNCTION 1 _vchordrq_support_scalar8_cosine_ops();

CREATE OPERATOR CLASS vector_l2_ops
FOR TYPE vector USING Vchordrqfscan FAMILY vector_l2_ops AS
OPERATOR 1 <-> (vector, vector) FOR ORDER BY float_ops,
Expand Down
8 changes: 4 additions & 4 deletions src/vchordrq/algorithm/insert.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
use crate::postgres::Relation;
use crate::vchordrq::algorithm::rabitq::fscan_process_lowerbound;
use crate::vchordrq::algorithm::rabitq::process_lowerbound;
use crate::vchordrq::algorithm::tuples::*;
use crate::vchordrq::algorithm::vectors;
use base::always_equal::AlwaysEqual;
Expand Down Expand Up @@ -31,7 +31,7 @@ pub fn insert<V: Vector>(
let vector = vector.as_borrowed();
let is_residual = meta_tuple.is_residual;
let default_lut = if !is_residual {
Some(V::rabitq_fscan_preprocess(vector))
Some(V::rabitq_preprocess(vector))
} else {
None
};
Expand Down Expand Up @@ -74,7 +74,7 @@ pub fn insert<V: Vector>(
let mut results = Vec::new();
{
let lut = if is_residual {
&V::rabitq_fscan_preprocess(
&V::rabitq_preprocess(
V::residual(vector, list.1.as_ref().map(|x| x.as_borrowed()).unwrap())
.as_borrowed(),
)
Expand All @@ -91,7 +91,7 @@ pub fn insert<V: Vector>(
.map(rkyv::check_archived_root::<Height1Tuple>)
.expect("data corruption")
.expect("data corruption");
let lowerbounds = fscan_process_lowerbound(
let lowerbounds = process_lowerbound(
distance_kind,
dims,
lut,
Expand Down
8 changes: 4 additions & 4 deletions src/vchordrq/algorithm/rabitq.rs
Original file line number Diff line number Diff line change
Expand Up @@ -61,19 +61,19 @@ pub fn code(dims: u32, vector: &[f32]) -> Code {

pub type Lut = (f32, f32, f32, f32, (Vec<u64>, Vec<u64>, Vec<u64>, Vec<u64>));

pub fn fscan_preprocess(vector: &[f32]) -> Lut {
pub fn preprocess(vector: &[f32]) -> Lut {
use base::simd::quantize;
let dis_v_2 = f32::reduce_sum_of_x2(vector);
let (k, b, qvector) = quantize::quantize(vector, 15.0);
let qvector_sum = if vector.len() <= 4369 {
let qvector_sum = if qvector.len() <= 4369 {
base::simd::u8::reduce_sum_of_x_as_u16(&qvector) as f32
} else {
base::simd::u8::reduce_sum_of_x_as_u32(&qvector) as f32
};
(dis_v_2, b, k, qvector_sum, binarize(&qvector))
}

pub fn fscan_process_lowerbound(
pub fn process_lowerbound(
distance_kind: DistanceKind,
_dims: u32,
lut: &Lut,
Expand Down Expand Up @@ -104,7 +104,7 @@ pub fn fscan_process_lowerbound(
}
}

fn binarize(vector: &[u8]) -> (Vec<u64>, Vec<u64>, Vec<u64>, Vec<u64>) {
pub fn binarize(vector: &[u8]) -> (Vec<u64>, Vec<u64>, Vec<u64>, Vec<u64>) {
let n = vector.len();
let mut t0 = vec![0u64; n.div_ceil(64)];
let mut t1 = vec![0u64; n.div_ceil(64)];
Expand Down
12 changes: 6 additions & 6 deletions src/vchordrq/algorithm/scan.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
use crate::postgres::Relation;
use crate::vchordrq::algorithm::rabitq::fscan_process_lowerbound;
use crate::vchordrq::algorithm::rabitq::process_lowerbound;
use crate::vchordrq::algorithm::tuples::*;
use crate::vchordrq::algorithm::vectors;
use base::always_equal::AlwaysEqual;
Expand Down Expand Up @@ -32,7 +32,7 @@ pub fn scan<V: Vector>(
let vector = V::random_projection(vector);
let is_residual = meta_tuple.is_residual;
let default_lut = if !is_residual {
Some(V::rabitq_fscan_preprocess(vector.as_borrowed()))
Some(V::rabitq_preprocess(vector.as_borrowed()))
} else {
None
};
Expand All @@ -53,7 +53,7 @@ pub fn scan<V: Vector>(
let mut results = Vec::new();
for list in lists {
let lut = if is_residual {
&V::rabitq_fscan_preprocess(
&V::rabitq_preprocess(
V::residual(
vector.as_borrowed(),
list.1.as_ref().map(|x| x.as_borrowed()).unwrap(),
Expand All @@ -73,7 +73,7 @@ pub fn scan<V: Vector>(
.map(rkyv::check_archived_root::<Height1Tuple>)
.expect("data corruption")
.expect("data corruption");
let lowerbounds = fscan_process_lowerbound(
let lowerbounds = process_lowerbound(
distance_kind,
dims,
lut,
Expand Down Expand Up @@ -125,7 +125,7 @@ pub fn scan<V: Vector>(
let mut results = Vec::new();
for list in lists {
let lut = if is_residual {
&V::rabitq_fscan_preprocess(
&V::rabitq_preprocess(
V::residual(
vector.as_borrowed(),
list.1.as_ref().map(|x| x.as_borrowed()).unwrap(),
Expand All @@ -145,7 +145,7 @@ pub fn scan<V: Vector>(
.map(rkyv::check_archived_root::<Height0Tuple>)
.expect("data corruption")
.expect("data corruption");
let lowerbounds = fscan_process_lowerbound(
let lowerbounds = process_lowerbound(
distance_kind,
dims,
lut,
Expand Down
153 changes: 148 additions & 5 deletions src/vchordrq/algorithm/tuples.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
use super::rabitq::{self, Code, Lut};
use crate::types::scalar8::Scalar8Owned;
use crate::vchordrq::types::OwnedVector;
use base::distance::DistanceKind;
use base::simd::ScalarLike;
use base::vector::VectorBorrowed;
use base::vector::{VectOwned, VectorOwned};
use half::f16;
use rkyv::{Archive, ArchiveUnsized, CheckBytes, Deserialize, Serialize};
Expand Down Expand Up @@ -56,7 +58,7 @@ pub trait Vector: VectorOwned {

fn residual(vector: Self::Borrowed<'_>, center: Self::Borrowed<'_>) -> Self;

fn rabitq_fscan_preprocess(vector: Self::Borrowed<'_>) -> Lut;
fn rabitq_preprocess(vector: Self::Borrowed<'_>) -> Lut;

fn rabitq_code(dims: u32, vector: Self::Borrowed<'_>) -> Code;

Expand Down Expand Up @@ -129,8 +131,8 @@ impl Vector for VectOwned<f32> {
Self::new(ScalarLike::vector_sub(vector.slice(), center.slice()))
}

fn rabitq_fscan_preprocess(vector: Self::Borrowed<'_>) -> Lut {
rabitq::fscan_preprocess(vector.slice())
fn rabitq_preprocess(vector: Self::Borrowed<'_>) -> Lut {
rabitq::preprocess(vector.slice())
}

fn rabitq_code(dims: u32, vector: Self::Borrowed<'_>) -> Code {
Expand Down Expand Up @@ -212,8 +214,8 @@ impl Vector for VectOwned<f16> {
Self::new(ScalarLike::vector_sub(vector.slice(), center.slice()))
}

fn rabitq_fscan_preprocess(vector: Self::Borrowed<'_>) -> Lut {
rabitq::fscan_preprocess(&f16::vector_to_f32(vector.slice()))
fn rabitq_preprocess(vector: Self::Borrowed<'_>) -> Lut {
rabitq::preprocess(&f16::vector_to_f32(vector.slice()))
}

fn rabitq_code(dims: u32, vector: Self::Borrowed<'_>) -> Code {
Expand All @@ -229,6 +231,147 @@ impl Vector for VectOwned<f16> {
}
}

impl Vector for Scalar8Owned {
type Metadata = (f32, f32, f32, f32);

type Element = u8;

fn metadata_from_archived(
archived: &<Self::Metadata as ArchiveUnsized>::Archived,
) -> Self::Metadata {
(archived.0, archived.1, archived.2, archived.3)
}

fn vector_split(vector: Self::Borrowed<'_>) -> (Self::Metadata, Vec<&[Self::Element]>) {
let code = vector.code();
(
(
vector.sum_of_x2(),
vector.k(),
vector.b(),
vector.sum_of_code(),
),
match code.len() {
0..=3840 => vec![code],
3841..=5120 => vec![&code[..2560], &code[2560..]],
5121.. => code.chunks(7680).collect(),
},
)
}

fn vector_merge(metadata: Self::Metadata, slice: &[Self::Element]) -> Self {
Scalar8Owned::new(
metadata.0,
metadata.1,
metadata.2,
metadata.3,
slice.to_vec(),
)
}

fn from_owned(vector: OwnedVector) -> Self {
match vector {
OwnedVector::Scalar8(x) => x,
_ => unreachable!(),
}
}

type DistanceAccumulator = (DistanceKind, u32, u32);

fn distance_begin(distance_kind: DistanceKind) -> Self::DistanceAccumulator {
(distance_kind, 0, 0)
}

fn distance_next(
accumulator: &mut Self::DistanceAccumulator,
left: &[Self::Element],
right: &[Self::Element],
) {
match accumulator.0 {
DistanceKind::L2 => accumulator.1 += base::simd::u8::reduce_sum_of_xy(left, right),
DistanceKind::Dot => accumulator.1 += base::simd::u8::reduce_sum_of_xy(left, right),
DistanceKind::Hamming => unreachable!(),
DistanceKind::Jaccard => unreachable!(),
}
accumulator.2 += left.len() as u32;
}

fn distance_end(
accumulator: Self::DistanceAccumulator,
(sum_of_x2_u, k_u, b_u, sum_of_code_u): Self::Metadata,
(sum_of_x2_v, k_v, b_v, sum_of_code_v): Self::Metadata,
) -> f32 {
match accumulator.0 {
DistanceKind::L2 => {
let xy = k_u * k_v * accumulator.1 as f32
+ b_u * b_v * accumulator.2 as f32
+ k_u * b_v * sum_of_code_u
+ b_u * k_v * sum_of_code_v;
sum_of_x2_u + sum_of_x2_v - 2.0 * xy
}
DistanceKind::Dot => {
let xy = k_u * k_v * accumulator.1 as f32
+ b_u * b_v * accumulator.2 as f32
+ k_u * b_v * sum_of_code_u
+ b_u * k_v * sum_of_code_v;
-xy
}
DistanceKind::Hamming => unreachable!(),
DistanceKind::Jaccard => unreachable!(),
}
}

fn random_projection(vector: Self::Borrowed<'_>) -> Self {
vector.own()
}

fn residual(_: Self::Borrowed<'_>, _: Self::Borrowed<'_>) -> Self {
unimplemented!()
}

fn rabitq_preprocess(vector: Self::Borrowed<'_>) -> Lut {
let dis_v_2 = vector.sum_of_code();
let k = vector.k() * 17.0;
let b = vector.b();
let qvector = vector
.code()
.iter()
.map(|&x| ((x as u32 + 8) / 17) as u8)
.collect::<Vec<_>>();
let qvector_sum = if qvector.len() <= 4369 {
base::simd::u8::reduce_sum_of_x_as_u16(&qvector) as f32
} else {
base::simd::u8::reduce_sum_of_x_as_u32(&qvector) as f32
};
(dis_v_2, b, k, qvector_sum, rabitq::binarize(&qvector))
}

fn rabitq_code(dims: u32, vector: Self::Borrowed<'_>) -> Code {
let dequantized = vector
.code()
.iter()
.map(|&x| vector.k() * x as f32 + vector.b())
.collect::<Vec<_>>();
rabitq::code(dims, &dequantized)
}

fn build_to_vecf32(vector: Self::Borrowed<'_>) -> Vec<f32> {
vector
.code()
.iter()
.map(|&x| vector.k() * x as f32 + vector.b())
.collect()
}

fn build_from_vecf32(x: &[f32]) -> Self {
let sum_of_x2 = f32::reduce_sum_of_x2(x);
let (k, b, code) =
base::simd::quantize::quantize(f32::vector_to_f32_borrowed(x).as_ref(), 255.0);
let sum_of_code = base::simd::u8::reduce_sum_of_x_as_u32(&code) as f32;
Self::new(sum_of_x2, k, b, sum_of_code, code)
}
}

#[derive(Clone, PartialEq, Archive, Serialize, Deserialize)]
#[archive(check_bytes)]
pub struct MetaTuple {
Expand Down
Loading

0 comments on commit dbd7659

Please sign in to comment.