Skip to content

Commit

Permalink
generate descriptions for nested properties
Browse files Browse the repository at this point in the history
  • Loading branch information
skarim committed Oct 6, 2024
1 parent 124aa72 commit 8d405a7
Show file tree
Hide file tree
Showing 4 changed files with 94 additions and 30 deletions.
4 changes: 2 additions & 2 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "@flisk/analyze-tracking",
"version": "0.3.0",
"version": "0.3.1",
"description": "Analyzes tracking code in a project and generates data schemas",
"main": "src/index.js",
"bin": {
Expand Down
8 changes: 8 additions & 0 deletions schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,10 @@
"unknown"
],
"description": "Name of the platform where the event is sent"
},
"description": {
"type": "string",
"description": "Description of how the event is triggered"
}
},
"required": [
Expand All @@ -89,6 +93,10 @@
"$ref": "#/definitions/property"
}
}
},
"description": {
"type": "string",
"description": "Description of the event"
}
},
"required": [
Expand Down
110 changes: 83 additions & 27 deletions src/generateDescriptions.js
Original file line number Diff line number Diff line change
Expand Up @@ -7,21 +7,25 @@ const { zodResponseFormat } = require('openai/helpers/zod');
const openai = new OpenAI({
apiKey: process.env.OPENAI_API_KEY,
});
const model = 'gpt-4o-mini';

function createPrompt(eventName, properties, implementations, codebaseDir) {
// Initialize the prompt
let prompt = `You are an expert at structured data extraction. Generate detailed descriptions for the following analytics event, its properties, and implementations.\n\n`;

// Add event name
prompt += `Event Name: "${eventName}"\n\n`;

// Add properties
let prompt = `Event Name: "${eventName}"\n\n`;
prompt += `Properties:\n`;
for (const propName in properties) {
const prop = properties[propName];
prompt += `- "${propName}" (type: ${prop.type})\n`;

function appendPropertiesToPrompt(properties, indent = '') {
for (const propName in properties) {
const prop = properties[propName];
prompt += `${indent}- "${propName}" (type: ${prop.type})\n`;
if (prop.properties) {
prompt += `${indent} Sub-properties:\n`;
appendPropertiesToPrompt(prop.properties, indent + ' ');
}
}
}

appendPropertiesToPrompt(properties);

// Add implementations with code snippets
prompt += `\nImplementations:\n`;
for (const impl of implementations) {
Expand Down Expand Up @@ -53,26 +57,49 @@ function getCodeSnippet(filePath, lineNumber, contextLines = 5) {
}

function createEventDescriptionSchema(properties) {
function buildPropertySchema(prop) {
if (prop.properties) {
const subPropertiesSchema = {};
for (const subPropName in prop.properties) {
subPropertiesSchema[subPropName] = buildPropertySchema(prop.properties[subPropName]);
}
return z.object({
description: z
.string()
.describe('A maximum of 10 words describing the property and what it means'),
properties: z.object(subPropertiesSchema),
});
} else {
return z.object({
description: z
.string()
.describe('A maximum of 10 words describing the property and what it means'),
});
}
}

// Define the schema for properties
const propertiesSchema = {};
for (const propName in properties) {
propertiesSchema[propName] = z.object({
description: z.string().describe('A maximum of 10 words describing the property and what it means'),
});
propertiesSchema[propName] = buildPropertySchema(properties[propName]);
}

// Define the schema for implementations
const implementationsSchema = z.array(
z.object({
description: z.string().describe('A maximum of 10 words describing when this event is triggered'),
description: z
.string()
.describe('A maximum of 10 words describing how this event is triggered without using the word "triggered"'),
path: z.string(),
line: z.number(),
})
);

// Construct the full schema
const eventDescriptionSchema = z.object({
eventDescription: z.string().describe('A maximum of 10 words describing the event and what it describes'),
eventDescription: z
.string()
.describe('A maximum of 10 words describing the event and what it tracks without using the word "tracks"'),
properties: z.object(propertiesSchema),
implementations: implementationsSchema,
});
Expand All @@ -83,11 +110,11 @@ function createEventDescriptionSchema(properties) {
async function sendPromptToLLM(prompt, schema) {
try {
const completion = await openai.beta.chat.completions.parse({
model: 'gpt-4o-mini',
model,
messages: [
{
role: 'system',
content: 'You are an expert at structured data extraction. Generate detailed descriptions for the following analytics event, its properties, and implementations',
content: 'You are an expert at structured data extraction. Generate detailed descriptions for the following analytics event, its properties, and implementations.',
},
{
role: 'user',
Expand All @@ -97,7 +124,10 @@ async function sendPromptToLLM(prompt, schema) {
response_format: zodResponseFormat(schema, 'event_description'),
});

return completion.choices[0].message.parsed;
return {
descriptions: completion.choices[0].message.parsed,
usage: completion.usage,
};
} catch (error) {
console.error('Error during LLM response parsing:', error);
return null;
Expand All @@ -115,36 +145,58 @@ async function generateEventDescription(eventName, event, codebaseDir) {
const eventDescriptionSchema = createEventDescriptionSchema(properties);

// Send prompt to the LLM and get the structured response
const descriptions = await sendPromptToLLM(prompt, eventDescriptionSchema);
const { descriptions, usage } = await sendPromptToLLM(prompt, eventDescriptionSchema);

return { eventName, descriptions };
return { eventName, descriptions, usage };
}

async function generateDescriptions(events, codebaseDir) {
console.log(`Generating descriptions using ${model}`);

const eventPromises = Object.entries(events).map(([eventName, event]) =>
generateEventDescription(eventName, event, codebaseDir)
);

console.log(`Running ${eventPromises.length} prompts in parallel...`);

const results = await Promise.all(eventPromises);

let promptTokens = 0;
let completionTokens = 0;

// Process results and update the events object
results.forEach(({ eventName, descriptions }) => {
results.forEach(({ eventName, descriptions, usage }) => {
if (descriptions) {
promptTokens += usage.prompt_tokens;
completionTokens += usage.completion_tokens;

const event = events[eventName];
event.description = descriptions.eventDescription;

// Update property descriptions
for (const propName in descriptions.properties) {
if (event.properties[propName]) {
event.properties[propName].description = descriptions.properties[propName].description;
// Update property descriptions recursively
function updatePropertyDescriptions(eventProperties, descriptionProperties) {
for (const propName in descriptionProperties) {
if (eventProperties[propName]) {
eventProperties[propName].description = descriptionProperties[propName].description;
if (eventProperties[propName].properties && descriptionProperties[propName].properties) {
updatePropertyDescriptions(
eventProperties[propName].properties,
descriptionProperties[propName].properties
);
}
}
}
}

updatePropertyDescriptions(event.properties, descriptions.properties);

// Update implementations with descriptions
for (let i = 0; i < descriptions.implementations.length; i++) {
if (event.implementations[i]) {
if (event.implementations[i].path === descriptions.implementations[i].path &&
event.implementations[i].line === descriptions.implementations[i].line) {
if (
event.implementations[i].path === descriptions.implementations[i].path &&
event.implementations[i].line === descriptions.implementations[i].line
) {
event.implementations[i].description = descriptions.implementations[i].description;
} else {
console.error(`Returned implementation description does not match path or line for event: ${eventName}`);
Expand All @@ -156,6 +208,10 @@ async function generateDescriptions(events, codebaseDir) {
}
});

console.log(`Prompt tokens used: ${promptTokens}`);
console.log(`Completion tokens used: ${completionTokens}`);
console.log(`Total tokens used: ${promptTokens + completionTokens}`);

return events;
}

Expand Down

0 comments on commit 8d405a7

Please sign in to comment.