Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

HCK-9133: vector RE #131

Merged
merged 2 commits into from
Dec 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ const _ = require('lodash');
const { commentIfDeactivated, wrapInQuotes, wrapComment } = require('../../utils/general');
const assignTemplates = require('../../utils/assignTemplates');
const templates = require('../templates');
const { isVector, isString, isDateTime } = require('./typeHelper');

const addLength = (type, length) => {
return `${type}(${length})`;
Expand Down Expand Up @@ -58,8 +59,6 @@ const canHaveTimePrecision = type => ['time', 'timestamp'].includes(type);
const canHaveScale = type => type === 'numeric';
const canHaveTypeModifier = type => ['geography', 'geometry'].includes(type);

const isVector = type => ['vector', 'halfvec', 'sparsevec'].includes(type);

const decorateType = (type, columnDefinition) => {
const { length, precision, scale, typeModifier, srid, timezone, timePrecision, dimension, subtype, array_type } =
columnDefinition;
Expand All @@ -81,9 +80,6 @@ const decorateType = (type, columnDefinition) => {
return addArrayDecorator(type, array_type);
};

const isString = type => ['char', 'varchar', 'text', 'bit', 'varbit'].includes(type);
const isDateTime = type => ['date', 'time', 'timestamp', 'interval'].includes(type);

const decorateDefault = (type, defaultValue, isArrayType) => {
const constantsValues = ['current_timestamp', 'current_user', 'null'];
if ((isString(type) || isDateTime(type)) && !constantsValues.includes(_.toLower(defaultValue)) && !isArrayType) {
Expand Down
11 changes: 11 additions & 0 deletions forward_engineering/ddlProvider/ddlHelpers/typeHelper.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
const isString = type => ['char', 'varchar', 'text', 'bit', 'varbit'].includes(type);

const isDateTime = type => ['date', 'time', 'timestamp', 'interval'].includes(type);

const isVector = type => ['vector', 'halfvec', 'sparsevec'].includes(type);

module.exports = {
isString,
isDateTime,
isVector,
};
13 changes: 13 additions & 0 deletions reverse_engineering/helpers/getJsonSchema.js
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
const _ = require('lodash');
const { isVector } = require('../../forward_engineering/ddlProvider/ddlHelpers/typeHelper');

const getJsonSchema = columns => {
const properties = columns.reduce((properties, column) => {
if (column.properties) {
Expand All @@ -10,6 +13,16 @@ const getJsonSchema = columns => {
};
}

if (isVector(column.type)) {
return {
...properties,
[column.name]: {
...column,
items: _.fill(Array(column.dimension), { type: 'number', mode: 'real' }),
},
};
}

return {
...properties,
[column.name]: column,
Expand Down
48 changes: 41 additions & 7 deletions reverse_engineering/helpers/postgresHelpers/columnHelper.js
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
const _ = require('lodash');
const { isVector } = require('../../../forward_engineering/ddlProvider/ddlHelpers/typeHelper');

const columnPropertiesMapper = {
column_default: 'default',
Expand All @@ -15,8 +16,21 @@ const columnPropertiesMapper = {
numeric_scale: 'scale',
datetime_precision: 'timePrecision',
attribute_mode: {
keyword: 'timePrecision',
check: (column, value) => value !== -1 && canHaveTimePrecision(column.data_type),
keyword: ({ column }) => {
if (isVector(column.udt_name)) {
return 'dimension';
}
return 'timePrecision';
},
check: (column, value) => {
if (!value || value === -1) {
return false;
}
if (isVector(column.udt_name)) {
return true;
}
return canHaveTimePrecision(column.data_type);
},
},
interval_type: 'intervalOptions',
collation_name: 'collationRule',
Expand All @@ -28,6 +42,8 @@ const columnPropertiesMapper = {
domain_name: 'domain_name',
};

const keysToExclude = ['numberOfArrayDimensions', 'udt_name'];

const getColumnValue = (column, key, value) => {
if (columnPropertiesMapper[key]?.check) {
return columnPropertiesMapper[key].check(column, value) ? value : '';
Expand All @@ -36,16 +52,28 @@ const getColumnValue = (column, key, value) => {
return _.get(columnPropertiesMapper, `${key}.values.${value}`, value);
};

const getColumnKey = ({ column, key }) => {
const mappedKey = columnPropertiesMapper[key];
if (mappedKey?.keyword) {
if (typeof mappedKey.keyword === 'function') {
return mappedKey.keyword({ column });
}
return mappedKey.keyword;
}
return mappedKey;
};

const mapColumnData = userDefinedTypes => column => {
return _.chain(column)
.toPairs()
.map(([key, value]) => [
columnPropertiesMapper[key]?.keyword || columnPropertiesMapper[key],
getColumnValue(column, key, value),
])
.map(([key, value]) => [getColumnKey({ column, key }), getColumnValue(column, key, value)])
.filter(([key, value]) => key && !_.isNil(value))
.fromPairs()
.thru(setColumnType(userDefinedTypes))
.thru(col => {
const columnWithType = setColumnType(userDefinedTypes)(col);
keysToExclude.forEach(key => delete columnWithType[key]);
return columnWithType;
})
.value();
};

Expand Down Expand Up @@ -187,6 +215,12 @@ const mapType = (userDefinedTypes, type) => {
case 'regrole':
case 'regtype':
return { type: 'oid', mode: type };
case 'vector':
return { type: 'vector', subtype: 'vector' };
case 'halfvec':
return { type: 'vector', subtype: 'halfvec' };
case 'sparsevec':
return { type: 'vector', subtype: 'sparsevec' };

default: {
if (_.some(userDefinedTypes, { name: type })) {
Expand Down
72 changes: 48 additions & 24 deletions reverse_engineering/helpers/queryConstants.js
Original file line number Diff line number Diff line change
Expand Up @@ -94,19 +94,52 @@ const getGET_FUNCTIONS_WITH_PROCEDURES_ADDITIONAL = postgresVersion => {
FROM pg_catalog.pg_proc WHERE pronamespace = $1;`;
};

const getGET_FUNCTIONS_WITH_PROCEDURES = ({ extensionsToExclude = [] }) => {
const extensionsStatement = extensionsToExclude.length ? extensionsToExclude.map(ext => `'${ext}'`).join(', ') : '';

return `
SELECT
specific_name,
routine_name AS name,
routine_type,
routine_definition,
external_language,
security_type,
type_udt_name AS return_data_type
FROM information_schema.routines
WHERE specific_schema = $1
AND specific_name NOT IN (
SELECT r.specific_name
FROM information_schema.routines r
JOIN pg_proc p
ON r.routine_name = p.proname
AND r.specific_schema = (
SELECT n.nspname
FROM pg_namespace n
WHERE n.oid = p.pronamespace
)
JOIN pg_depend d
ON d.objid = p.oid
JOIN pg_extension e
ON d.refobjid = e.oid
WHERE e.extname IN (${extensionsStatement})
);
`;
};

const queryConstants = {
PING: 'SELECT schema_name FROM information_schema.schemata LIMIT 1;',
GET_VERSION: 'SELECT version()',
GET_VERSION_AS_NUM: 'SHOW server_version_num;',
GET_SCHEMA_NAMES: 'SELECT schema_name FROM information_schema.schemata;',
GET_TABLE_NAMES: `
SELECT tables.table_name, tables.table_type FROM information_schema.tables AS tables
INNER JOIN
INNER JOIN
(SELECT
pg_class.relname AS table_name,
pg_namespace.nspname AS table_schema
FROM pg_catalog.pg_class AS pg_class
INNER JOIN pg_catalog.pg_namespace AS pg_namespace
INNER JOIN pg_catalog.pg_namespace AS pg_namespace
ON (pg_namespace.oid = pg_class.relnamespace)
WHERE pg_class.relispartition = false
AND pg_class.relkind = ANY('{"r","v","t","m","p"}'))
Expand All @@ -120,8 +153,8 @@ const queryConstants = {
GET_NAMESPACE_OID: 'SELECT oid FROM pg_catalog.pg_namespace WHERE nspname = $1',
GET_TABLE_LEVEL_DATA: `
SELECT pc.oid, pc.relpersistence, pc.reloptions, pt.spcname, pg_get_expr(pc.relpartbound, pc.oid) AS partition_expr
FROM pg_catalog.pg_class AS pc
LEFT JOIN pg_catalog.pg_tablespace AS pt
FROM pg_catalog.pg_class AS pc
LEFT JOIN pg_catalog.pg_tablespace AS pt
ON pc.reltablespace = pt.oid
WHERE pc.relname = $1 AND pc.relnamespace = $2;`,
GET_TABLE_TOAST_OPTIONS: `
Expand Down Expand Up @@ -155,15 +188,15 @@ const queryConstants = {
GET_ROWS_COUNT: fullTableName => `SELECT COUNT(*) AS quantity FROM ${fullTableName};`,
GET_SAMPLED_DATA: (fullTableName, jsonColumns) => `SELECT ${jsonColumns} FROM ${fullTableName} LIMIT $1;`,
GET_SAMPLED_DATA_SIZE: (fullTableName, jsonColumns) => `
SELECT sum(pg_column_size(_hackolade_tmp_sampling_tbl.*)) AS _hackolade_tmp_sampling_tbl_size
SELECT sum(pg_column_size(_hackolade_tmp_sampling_tbl.*)) AS _hackolade_tmp_sampling_tbl_size
FROM (SELECT ${jsonColumns} FROM ${fullTableName} LIMIT $1) AS _hackolade_tmp_sampling_tbl;`,
GET_INHERITS_PARENT_TABLE_NAME: `
SELECT pc.relname AS parent_table_name FROM pg_catalog.pg_inherits AS pi
INNER JOIN pg_catalog.pg_class AS pc
ON pc.oid = pi.inhparent
WHERE pi.inhrelid = $1;`,
GET_TABLE_CONSTRAINTS: `
SELECT pcon.conname AS constraint_name,
SELECT pcon.conname AS constraint_name,
pcon.contype AS constraint_type,
pcon.connoinherit AS no_inherit,
pcon.conkey AS constraint_keys,
Expand All @@ -181,14 +214,14 @@ const queryConstants = {
GET_TABLE_INDEXES_V_10: getGET_TABLE_INDEXES(10),
GET_TABLE_INDEXES_V_15: getGET_TABLE_INDEXES(15),
GET_TABLE_FOREIGN_KEYS: `
SELECT pcon.conname AS relationship_name,
SELECT pcon.conname AS relationship_name,
pcon.conkey AS table_columns_positions,
pcon.confdeltype AS relationship_on_delete,
pcon.confupdtype AS relationship_on_update,
pcon.confmatchtype AS relationship_match,
pc_foreign_table.relname AS foreign_table_name,
pc_foreign_table.relname AS foreign_table_name,
ARRAY(
SELECT column_name::text FROM unnest(pcon.confkey) AS column_position
SELECT column_name::text FROM unnest(pcon.confkey) AS column_position
JOIN information_schema.columns ON (ordinal_position = column_position)
WHERE table_name = pc_foreign_table.relname AND table_schema = foreign_table_namespace.nspname)::text[] AS foreign_columns,
foreign_table_namespace.nspname AS foreign_table_schema
Expand All @@ -201,22 +234,13 @@ const queryConstants = {
GET_VIEW_DATA: `SELECT * FROM information_schema.views WHERE table_name = $1 AND table_schema = $2;`,
GET_VIEW_SELECT_STMT_FALLBACK: `SELECT definition FROM pg_views WHERE viewname = $1 AND schemaname = $2;`,
GET_VIEW_OPTIONS: `
SELECT oid,
SELECT oid,
reloptions AS view_options,
relpersistence AS persistence,
obj_description(oid, 'pg_class') AS description
FROM pg_catalog.pg_class
FROM pg_catalog.pg_class
WHERE relname = $1 AND relnamespace = $2;`,
GET_FUNCTIONS_WITH_PROCEDURES: `
SELECT specific_name,
routine_name AS name,
routine_type,
routine_definition,
external_language,
security_type,
type_udt_name AS return_data_type
FROM information_schema.routines
WHERE specific_schema=$1;`,
GET_FUNCTIONS_WITH_PROCEDURES: getGET_FUNCTIONS_WITH_PROCEDURES({ extensionsToExclude: ['vector'] }),
GET_FUNCTIONS_WITH_PROCEDURES_ARGS: `
SELECT parameter_name,
parameter_mode,
Expand Down Expand Up @@ -315,7 +339,7 @@ const queryConstants = {
(c.relname)::information_schema.sql_identifier,
em.num, ((t.tgtype)::integer & 1), ((t.tgtype)::integer & 66)
ORDER BY t.tgname))::information_schema.cardinal_number AS action_order,
(regexp_match(pg_get_triggerdef(t.oid), '.{35,} WHEN \((.+)\) EXECUTE FUNCTION'::text))[1]::information_schema.character_data AS action_condition,
(regexp_match(pg_get_triggerdef(t.oid), '.{35,} WHEN ((.+)) EXECUTE FUNCTION'::text))[1]::information_schema.character_data AS action_condition,
(SUBSTRING(pg_get_triggerdef(t.oid)
FROM (POSITION(('EXECUTE FUNCTION'::text) IN (SUBSTRING(pg_get_triggerdef(t.oid)
FROM 48))) + 47)))::information_schema.character_data AS action_statement,
Expand Down Expand Up @@ -363,7 +387,7 @@ const queryConstants = {
pg_class_referenced.relname AS referenced_table_name,
pg_namespace_referenced.nspname AS referenced_table_schema
FROM pg_catalog.pg_trigger as pg_trigger
LEFT JOIN pg_catalog.pg_proc AS pg_proc
LEFT JOIN pg_catalog.pg_proc AS pg_proc
ON (pg_trigger.tgfoid = pg_proc.oid)
LEFT JOIN pg_catalog.pg_attribute AS pg_attribute
ON (pg_attribute.attnum = ANY(pg_trigger.tgattr::int2[]) AND pg_trigger.tgrelid = pg_attribute.attrelid)
Expand All @@ -376,7 +400,7 @@ const queryConstants = {
LEFT JOIN pg_catalog.pg_namespace AS pg_namespace_referenced
ON(pg_namespace_referenced.oid = pg_class_referenced.relnamespace)
WHERE pg_class.relnamespace = $1 AND pg_class.oid = $2
GROUP BY
GROUP BY
trigger_name,
function_name,
"constraint",
Expand Down
Loading