Skip to content

Commit

Permalink
fix by comments
Browse files Browse the repository at this point in the history
Signed-off-by: cutecutecat <junyuchen@tensorchord.ai>
  • Loading branch information
cutecutecat committed Dec 24, 2024
1 parent dabca33 commit 144363f
Show file tree
Hide file tree
Showing 2 changed files with 59 additions and 34 deletions.
64 changes: 35 additions & 29 deletions src/vchordrq/algorithm/build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -203,48 +203,54 @@ impl Structure {
) -> Vec<Self> {
use std::collections::BTreeMap;
let VchordrqExternalBuildOptions { table } = external_build;
let dump_query = format!("SELECT id, parent, vector FROM {table};");
let table_name = table.split('.').last().unwrap().to_string();
let type_check_query = format!(
"SELECT COUNT(*)::INTEGER
FROM pg_catalog.pg_extension e
LEFT JOIN pg_catalog.pg_namespace n ON n.oid = e.extnamespace
LEFT JOIN information_schema.columns i ON i.udt_schema = n.nspname
WHERE e.extname = 'vector' AND i.udt_name = 'vector'
AND i.table_name = '{table_name}' AND i.column_name = 'vector';"
);
let mut parents = BTreeMap::new();
let mut vectors = BTreeMap::new();
pgrx::spi::Spi::connect(|client| {
use crate::datatype::memory_pgvector_vector::PgvectorVectorOutput;
use base::vector::VectorBorrowed;
use pgrx::pg_sys::panic::ErrorReportable;
// Check the column of centroid table named `vector`, which type should be pgvector::vector
let type_check = client
.select(&type_check_query, None, None)
.unwrap_or_report();
let count: Result<Option<i32>, _> = type_check.first().get_by_name("count");
if count != Ok(Some(1)) {
pgrx::warning!("{:?}", count);
// Get the schema of pgvector
let schema_query = format!(
"SELECT n.nspname::TEXT
FROM pg_catalog.pg_extension e
LEFT JOIN pg_catalog.pg_namespace n ON n.oid = e.extnamespace
LEFT JOIN information_schema.columns i ON i.udt_schema = n.nspname
WHERE e.extname = 'vector' AND i.table_name = '{table_name}' AND i.column_name = 'vector';");
let nspname: Vec<String> = client
.select(&schema_query, None, None)
.unwrap_or_report()
.map(|data| {
data.get_by_name("nspname")
.expect("external build: cannot get schema of pgvector")
.expect("external build: cannot get schema of pgvector")
})
.collect();
// Check the `vector` column is pgvector-based type
let pgvector_schema = if let [schema] = &nspname[..] {
schema.clone()
} else {
pgrx::error!(
"extern build: `vector` column should be pgvector::vector type at the centroid table"
"external build: `vector` column should be a pgvector type at the external table"
);
}
let table = client.select(&dump_query, None, None).unwrap_or_report();
for row in table {
};
let dump_query =
format!("SELECT id, parent, vector::{pgvector_schema}.vector FROM {table};");
let centroids = client.select(&dump_query, None, None).unwrap_or_report();
for row in centroids {
let id: Option<i32> = row.get_by_name("id").unwrap();
let parent: Option<i32> = row.get_by_name("parent").unwrap();
let vector: Option<PgvectorVectorOutput> = row.get_by_name("vector").unwrap();
let id = id.expect("extern build: id could not be NULL");
let vector = vector.expect("extern build: vector could not be NULL");
let id = id.expect("external build: id could not be NULL");
let vector = vector.expect("external build: vector could not be NULL");
let pop = parents.insert(id, parent);
if pop.is_some() {
pgrx::error!(
"extern build: there are at least two lines have same id, id = {id}"
"external build: there are at least two lines have same id, id = {id}"
);
}
if vector_options.dims != vector.as_borrowed().dims() {
pgrx::error!("extern build: incorrect dimension, id = {id}");
pgrx::error!("external build: incorrect dimension, id = {id}");
}
vectors.insert(id, crate::projection::project(vector.as_borrowed().slice()));
}
Expand Down Expand Up @@ -283,7 +289,7 @@ impl Structure {
parent.push(id);
} else {
pgrx::error!(
"extern build: parent does not exist, id = {id}, parent = {parent}"
"external build: parent does not exist, id = {id}, parent = {parent}"
);
}
} else {
Expand All @@ -295,7 +301,7 @@ impl Structure {
}
}
let Some(root) = root else {
pgrx::error!("extern build: there are no root");
pgrx::error!("external build: there are no root");
};
let mut heights = BTreeMap::<_, _>::new();
fn dfs_for_heights(
Expand All @@ -304,7 +310,7 @@ impl Structure {
u: i32,
) {
if heights.contains_key(&u) {
pgrx::error!("extern build: detect a cycle, id = {u}");
pgrx::error!("external build: detect a cycle, id = {u}");
}
heights.insert(u, None);
let mut height = None;
Expand All @@ -313,7 +319,7 @@ impl Structure {
let new = heights[&v].unwrap() + 1;
if let Some(height) = height {
if height != new {
pgrx::error!("extern build: two heights, id = {u}");
pgrx::error!("external build: two heights, id = {u}");
}
} else {
height = Some(new);
Expand All @@ -331,7 +337,7 @@ impl Structure {
.collect::<BTreeMap<_, _>>();
if !(1..=8).contains(&(heights[&root] - 1)) {
pgrx::error!(
"extern build: unexpected tree height, height = {}",
"external build: unexpected tree height, height = {}",
heights[&root]
);
}
Expand Down
29 changes: 24 additions & 5 deletions tests/logic/external_build.slt
Original file line number Diff line number Diff line change
Expand Up @@ -18,14 +18,23 @@ INSERT INTO vector_centroid (id, vector) VALUES
(2, '[0.0, 0.0, 1.0]');

statement ok
CREATE TABLE bad_type_centroid (id integer, parent integer, vector halfvec(3));
CREATE TABLE halfvec_centroid (id integer, parent integer, vector halfvec(3));

statement ok
INSERT INTO bad_type_centroid (id, vector) VALUES
INSERT INTO halfvec_centroid (id, vector) VALUES
(0, '[1.0, 0.0, 0.0]'),
(1, '[0.0, 1.0, 0.0]'),
(2, '[0.0, 0.0, 1.0]');

statement ok
CREATE TABLE bad_type_centroid (id integer, parent integer, vector real[]);

statement ok
INSERT INTO bad_type_centroid (id, vector) VALUES
(0, '{1.0, 0.0, 0.0}'),
(1, '{0.0, 1.0, 0.0}'),
(2, '{0.0, 0.0, 1.0}');

statement ok
CREATE TABLE bad_duplicate_id (id integer, parent integer, vector vector(3));

Expand Down Expand Up @@ -55,9 +64,19 @@ residual_quantization = true
table = 'public.vector_centroid'
$$);

# failed: bad vector bad_type
# external build for halfvec column by a halfvec table

statement ok
CREATE INDEX ON t USING vchordrq (val1 halfvec_l2_ops)
WITH (options = $$
residual_quantization = true
[build.external]
table = 'public.halfvec_centroid'
$$);

# failed: bad vector data type

statement error extern build: `vector` column should be pgvector::vector type at the centroid table
statement error external build: `vector` column should be a pgvector type at the external table
CREATE INDEX ON t USING vchordrq (val0 vector_l2_ops)
WITH (options = $$
residual_quantization = true
Expand All @@ -67,7 +86,7 @@ $$);

# failed: duplicate id

statement error extern build: there are at least two lines have same id, id = 1
statement error external build: there are at least two lines have same id, id = 1
CREATE INDEX ON t USING vchordrq (val0 vector_l2_ops)
WITH (options = $$
residual_quantization = true
Expand Down

0 comments on commit 144363f

Please sign in to comment.