cube-js · KSDaemon · Aug 21, 2025 · Aug 21, 2025 · Aug 21, 2025 · Aug 21, 2025
diff --git a/docs/pages/product/configuration/reference/environment-variables.mdx b/docs/pages/product/configuration/reference/environment-variables.mdx
@@ -1377,6 +1377,16 @@ See [this issue](https://github.com/cube-js/cube/issues/9285) for details.
 
 </ReferenceBox>
 
+## `CUBEJS_YAML_FOLDED_STRINGS_AS_LITERAL`
+
+If `true`, enables interpreting [folded strings][ref-yaml-folded-and-literal] in
+YAML-based data model files as literal strings. This affects how line breaks and spaces
+are handled in multi-line strings.
+
+| Possible Values | Default in Development | Default in Production |
+| --------------- | ---------------------- | --------------------- |
+| `true`, `false` | `true`                 | `true`                |
+
 ## `CUBEJS_WEB_SOCKETS`
 
 If `true`, then use WebSocket for data fetching.
@@ -1799,4 +1809,5 @@ The port for a Cube deployment to listen to API connections on.
 [ref-multi-stage-calculations]: /product/data-modeling/concepts/multi-stage-calculations
 [ref-folders]: /product/data-modeling/reference/view#folders
 [ref-dataviz-tools]: /product/configuration/visualization-tools
-[ref-context-to-app-id]: /product/configuration/reference/config#context_to_app_id
+[ref-context-to-app-id]: /product/configuration/reference/config#context_to_app_id
+[ref-yaml-folded-and-literal]: /product/data-modeling/dynamic/jinja#folded-and-literal-strings
@@ -82,7 +82,8 @@ export class YamlCompiler {
       return;
     }
 
-    const yamlObj: any = YAML.load(file.content);
+    const processedContent = this.preprocessYamlSqlMultilineValues(file.content);
+    const yamlObj: any = YAML.load(processedContent);
     if (!yamlObj) {
       return;
     }
@@ -345,4 +346,13 @@ export class YamlCompiler {
 
     return ast;
   }
+
+  private preprocessYamlSqlMultilineValues(yamlContent: string): string {
+    // Convert all folded scalars (sql: >) to literal scalars (sql: |)
+    // to preserve SQL formatting including comments and whitespace
+    return yamlContent.replace(
+      /(\s+sql:\s*)>/g,
+      '$1|'
+    );
+  }
 }
@@ -0,0 +1,131 @@
+import { PostgresQuery } from '../../src/adapter/PostgresQuery';
+import { prepareCompiler } from './PrepareCompiler';
+
+describe('JavaScript SQL Comments Preservation', () => {
+  it('preserves SQL comments in JS models', async () => {
+    const { compiler, joinGraph, cubeEvaluator } = prepareCompiler([
+      {
+        fileName: 'test.js',
+        content: `
+          cube('JSTestCube', {
+            sql: \`
+              SELECT
+                  r.id as record_id,
+                  r.created_at as record_created_at,
+                  -- Extract target_record_id from workspace association JSON
+                  JSON_EXTRACT_SCALAR(workspace.value, '$[0].target_record_id') as workspace_target_record_id,
+                  -- Get actual workspace name by joining with workspace record
+                  CASE
+                      WHEN workspace_name.value IS NOT NULL
+                      THEN JSON_EXTRACT_SCALAR(JSON_EXTRACT_ARRAY(workspace_name.value)[OFFSET(0)], '$.value')
+                      ELSE NULL
+                  END as workspace_name
+              FROM \\\`table\\\`.\\\`record\\\` r
+              JOIN \\\`table\\\`.\\\`object\\\` o ON r.object_id = o.id
+              -- Get company name
+              LEFT JOIN \\\`table\\\`.\\\`record_value\\\` company_name ON r.id = company_name.record_id
+                  AND company_name.name = 'name'
+              WHERE r._fivetran_deleted = FALSE
+                  AND o.singular_noun = 'Company'
+            \`,
+
+            dimensions: {
+              record_id: {
+                sql: 'record_id',
+                type: 'string',
+                primaryKey: true
+              }
+            },
+
+            measures: {
+              count: {
+                type: 'count'
+              }
+            }
+          });
+        `
+      }
+    ]);
+
+    await compiler.compile();
+
+    // Build a simple query to extract the actual SQL
+    const query = new PostgresQuery({ joinGraph, cubeEvaluator, compiler }, {
+      measures: ['JSTestCube.count'],
+      dimensions: ['JSTestCube.record_id'],
+      timezone: 'UTC'
+    });
+
+    const [sql] = query.buildSqlAndParams();
+
+    // Verify that SQL comments are preserved on separate lines
+    expect(sql).toContain('-- Extract target_record_id from workspace association JSON');
+    expect(sql).toContain('-- Get actual workspace name by joining with workspace record');
+    expect(sql).toContain('-- Get company name');
+
+    // Ensure comments are on separate lines in JS models
+    const lines = sql.split('\n');
+    const commentLine = lines.find(line => line.trim() === '-- Get company name');
+    expect(commentLine).toBeDefined();
+  });
+
+  it('handles edge cases in JS SQL strings', async () => {
+    const { compiler, joinGraph, cubeEvaluator } = prepareCompiler([
+      {
+        fileName: 'edge-cases.js',
+        content: `
+          cube('EdgeCasesTest', {
+            sql: \`
+              SELECT
+                  id,
+                  -- Comment with 'quotes' and "double quotes"
+                  name,
+                  -- Comment with special chars: !@#$%^&*()
+                  email,
+                  created_at
+              FROM users
+              -- SQL string in comment: SELECT * FROM table
+              WHERE active = true
+            \`,
+
+            dimensions: {
+              id: {
+                sql: 'id',
+                type: 'string',
+                primaryKey: true
+              }
+            },
+
+            measures: {
+              count: {
+                type: 'count'
+              }
+            }
+          });
+        `
+      }
+    ]);
+
+    await compiler.compile();
+
+    const query = new PostgresQuery({ joinGraph, cubeEvaluator, compiler }, {
+      measures: ['EdgeCasesTest.count'],
+      dimensions: ['EdgeCasesTest.id'],
+      timezone: 'UTC'
+    });
+
+    const [sql] = query.buildSqlAndParams();
+
+    const testLines = [
+      '-- Comment with \'quotes\' and "double quotes"',
+      '-- Comment with special chars: !@#$%^&*()',
+      '-- SQL string in comment: SELECT * FROM table',
+    ];
+
+    // Ensure all comments are properly preserved
+    const lines = sql.split('\n').map(l => l.trim());
+    for (const testLine of testLines) {
+      expect(lines.includes(testLine)).toBeTruthy();
+    }
+  });
+});
@@ -0,0 +1,174 @@
+import { PostgresQuery } from '../../src/adapter/PostgresQuery';
+import { prepareYamlCompiler } from './PrepareCompiler';
+
+describe('YAML SQL Formatting Preservation', () => {
+  it('handles sql: > (folded scalar)', async () => {
+    const { compiler, joinGraph, cubeEvaluator } = prepareYamlCompiler(
+      `
+      cubes:
+      - name: Orders
+        sql:     >
+          SELECT
+              r.id as record_id,
+              r.created_at as record_created_at,
+              -- Extract target_record_id from workspace association JSON
+              JSON_EXTRACT_SCALAR(workspace.value, '$[0].target_record_id') as workspace_target_record_id,
+              -- Get actual workspace name by joining with workspace record
+              CASE
+                  WHEN workspace_name.value IS NOT NULL
+                  THEN JSON_EXTRACT_SCALAR(JSON_EXTRACT_ARRAY(workspace_name.value)[OFFSET(0)], '$.value')
+                  ELSE NULL
+              END as workspace_name
+          FROM \`table\`.\`record\` r
+          JOIN \`table\`.\`object\` o ON r.object_id = o.id
+          -- Get company name
+          LEFT JOIN \`table\`.\`record_value\` company_name ON r.id = company_name.record_id
+              AND company_name.name = 'name'
+          WHERE r._fivetran_deleted = FALSE
+              AND o.singular_noun = 'Company'
+
+        dimensions:
+          - name: record_id
+            sql: record_id
+            type: string
+            primaryKey: true
+        measures:
+          - name: count
+            type: count
+      `
+    );
+
+    await compiler.compile();
+
+    // Build a simple query to extract the actual SQL
+    const query = new PostgresQuery({ joinGraph, cubeEvaluator, compiler }, {
+      measures: ['Orders.count'],
+      dimensions: ['Orders.record_id'],
+      timezone: 'UTC'
+    });
+
+    const [sql] = query.buildSqlAndParams();
+
+    // Verify that SQL comments are preserved on separate lines
+    expect(sql).toContain('-- Extract target_record_id from workspace association JSON');
+    expect(sql).toContain('-- Get actual workspace name by joining with workspace record');
+    expect(sql).toContain('-- Get company name');
+
+    // Most importantly, ensure comments are NOT merged with the previous line
+    const lines = sql.split('\n');
+    const commentLine = lines.find(line => line.trim() === '-- Get company name');
+    expect(commentLine).toBeDefined();
+  });
+
+  it('handles sql: | (literal scalar)', async () => {
+    const { compiler, joinGraph, cubeEvaluator } = prepareYamlCompiler(
+      `
+      cubes:
+      - name: TestCube
+        sql: |
+          SELECT id, name
+          -- Comment here
+          FROM table1
+          WHERE active = true
+
+        dimensions:
+          - name: id
+            sql: id
+            type: string
+            primaryKey: true
+        measures:
+          - name: count
+            type: count
+      `
+    );
+
+    await compiler.compile();
+
+    const query = new PostgresQuery({ joinGraph, cubeEvaluator, compiler }, {
+      measures: ['TestCube.count'],
+      dimensions: ['TestCube.id'],
+      timezone: 'UTC'
+    });
+
+    const [sql] = query.buildSqlAndParams();
+
+    // Should preserve comments with literal scalar
+    expect(sql).toContain('-- Comment here');
+    const lines = sql.split('\n');
+    const commentLine = lines.find(line => line.trim() === '-- Comment here');
+    expect(commentLine).toBeDefined();
+  });
+
+  it('handles single-line SQL without multilines', async () => {
+    const { compiler, joinGraph, cubeEvaluator } = prepareYamlCompiler(
+      `
+      cubes:
+      - name: TestCube
+        sql: "SELECT id, name FROM table1"
+
+        dimensions:
+          - name: id
+            sql: id
+            type: string
+            primaryKey: true
+        measures:
+          - name: count
+            type: count
+      `
+    );
+
+    await compiler.compile();
+
+    const query = new PostgresQuery({ joinGraph, cubeEvaluator, compiler }, {
+      measures: ['TestCube.count'],
+      dimensions: ['TestCube.id'],
+      timezone: 'UTC'
+    });
+
+    const [sql] = query.buildSqlAndParams();
+
+    // Should work normally for single-line SQL
+    expect(sql).toContain('SELECT id, name FROM table1');
+  });
+
+  it('works correctly for SQL without comments', async () => {
+    const { compiler, joinGraph, cubeEvaluator } = prepareYamlCompiler(
+      `
+      cubes:
+      - name: SimpleOrders
+        sql: >
+          SELECT
+              id,
+              amount,
+              status
+          FROM orders
+          WHERE active = true
+
+        dimensions:
+          - name: id
+            sql: id
+            type: string
+            primaryKey: true
+        measures:
+          - name: count
+            type: count
+      `
+    );
+
+    await compiler.compile();
+
+    // Build a simple query to extract the actual SQL
+    const query = new PostgresQuery({ joinGraph, cubeEvaluator, compiler }, {
+      measures: ['SimpleOrders.count'],
+      dimensions: ['SimpleOrders.id'],
+      timezone: 'UTC'
+    });
+
+    const [sql] = query.buildSqlAndParams();
+
+    // Should still work normally for SQL without comments
+    expect(sql).toContain('SELECT');
+    expect(sql).toContain('FROM orders');
+    expect(sql).toContain('WHERE active = true');
+  });
+});