1
- use std:: sync:: Arc ;
1
+ use std:: { cmp :: max , sync:: Arc } ;
2
2
3
3
use common_error:: { DaftError , DaftResult } ;
4
4
use common_runtime:: RuntimeRef ;
@@ -12,15 +12,73 @@ use super::intermediate_op::{
12
12
} ;
13
13
use crate :: NUM_CPUS ;
14
14
15
+ fn num_parallel_exprs ( projection : & [ ExprRef ] ) -> usize {
16
+ max (
17
+ projection
18
+ . iter ( )
19
+ . map ( |expr| expr. has_compute ( ) )
20
+ . filter ( |x| * x)
21
+ . count ( ) ,
22
+ 1 ,
23
+ )
24
+ }
25
+
15
26
pub struct ProjectOperator {
16
27
projection : Arc < Vec < ExprRef > > ,
28
+ max_concurrency : usize ,
29
+ parallel_exprs : usize ,
17
30
}
18
31
19
32
impl ProjectOperator {
20
- pub fn new ( projection : Vec < ExprRef > ) -> Self {
21
- Self {
33
+ pub fn new ( projection : Vec < ExprRef > ) -> DaftResult < Self > {
34
+ let ( max_concurrency, parallel_exprs) = Self :: get_optimal_allocation ( & projection) ?;
35
+ Ok ( Self {
22
36
projection : Arc :: new ( projection) ,
23
- }
37
+ max_concurrency,
38
+ parallel_exprs,
39
+ } )
40
+ }
41
+
42
+ // This function is used to determine the optimal allocation of concurrency and expression parallelism
43
+ fn get_optimal_allocation ( projection : & [ ExprRef ] ) -> DaftResult < ( usize , usize ) > {
44
+ let resource_request = get_resource_request ( projection) ;
45
+ // The number of CPUs available for the operator.
46
+ let available_cpus = match resource_request {
47
+ // If the resource request specifies a number of CPUs, the available cpus is the number of actual CPUs
48
+ // divided by the requested number of CPUs, clamped to (1, NUM_CPUS).
49
+ // E.g. if the resource request specifies 2 CPUs and NUM_CPUS is 4, the number of available cpus is 2.
50
+ Some ( resource_request) if resource_request. num_cpus ( ) . is_some ( ) => {
51
+ let requested_num_cpus = resource_request. num_cpus ( ) . unwrap ( ) ;
52
+ if requested_num_cpus > * NUM_CPUS as f64 {
53
+ Err ( DaftError :: ValueError ( format ! (
54
+ "Requested {} CPUs but found only {} available" ,
55
+ requested_num_cpus, * NUM_CPUS
56
+ ) ) )
57
+ } else {
58
+ Ok (
59
+ ( * NUM_CPUS as f64 / requested_num_cpus) . clamp ( 1.0 , * NUM_CPUS as f64 )
60
+ as usize ,
61
+ )
62
+ }
63
+ }
64
+ _ => Ok ( * NUM_CPUS ) ,
65
+ } ?;
66
+
67
+ let max_parallel_exprs = num_parallel_exprs ( projection) ;
68
+
69
+ // Calculate optimal concurrency using ceiling division
70
+ // Example: For 128 CPUs and 60 parallel expressions:
71
+ // max_concurrency = (128 + 60 - 1) / 60 = 3 concurrent tasks
72
+ // This ensures we never exceed max_parallel_exprs per task
73
+ let max_concurrency = ( available_cpus + max_parallel_exprs - 1 ) / max_parallel_exprs;
74
+
75
+ // Calculate actual parallel expressions per task using floor division
76
+ // Example: For 128 CPUs and 3 concurrent tasks:
77
+ // num_parallel_exprs = 128 / 3 = 42 parallel expressions per task
78
+ // This ensures even distribution across concurrent tasks
79
+ let num_parallel_exprs = available_cpus / max_concurrency;
80
+
81
+ Ok ( ( max_concurrency, num_parallel_exprs) )
24
82
}
25
83
}
26
84
@@ -33,9 +91,16 @@ impl IntermediateOperator for ProjectOperator {
33
91
runtime : & RuntimeRef ,
34
92
) -> IntermediateOpExecuteResult {
35
93
let projection = self . projection . clone ( ) ;
94
+ let num_parallel_exprs = self . parallel_exprs ;
36
95
runtime
37
96
. spawn ( async move {
38
- let out = input. eval_expression_list ( & projection) ?;
97
+ let out = if num_parallel_exprs > 1 {
98
+ input
99
+ . par_eval_expression_list ( & projection, num_parallel_exprs)
100
+ . await ?
101
+ } else {
102
+ input. eval_expression_list ( & projection) ?
103
+ } ;
39
104
Ok ( (
40
105
state,
41
106
IntermediateOperatorResult :: NeedMoreInput ( Some ( Arc :: new ( out) ) ) ,
@@ -49,26 +114,6 @@ impl IntermediateOperator for ProjectOperator {
49
114
}
50
115
51
116
fn max_concurrency ( & self ) -> DaftResult < usize > {
52
- let resource_request = get_resource_request ( & self . projection ) ;
53
- match resource_request {
54
- // If the resource request specifies a number of CPUs, the max concurrency is the number of CPUs
55
- // divided by the requested number of CPUs, clamped to (1, NUM_CPUS).
56
- // E.g. if the resource request specifies 2 CPUs and NUM_CPUS is 4, the max concurrency is 2.
57
- Some ( resource_request) if resource_request. num_cpus ( ) . is_some ( ) => {
58
- let requested_num_cpus = resource_request. num_cpus ( ) . unwrap ( ) ;
59
- if requested_num_cpus > * NUM_CPUS as f64 {
60
- Err ( DaftError :: ValueError ( format ! (
61
- "Requested {} CPUs but found only {} available" ,
62
- requested_num_cpus, * NUM_CPUS
63
- ) ) )
64
- } else {
65
- Ok (
66
- ( * NUM_CPUS as f64 / requested_num_cpus) . clamp ( 1.0 , * NUM_CPUS as f64 )
67
- as usize ,
68
- )
69
- }
70
- }
71
- _ => Ok ( * NUM_CPUS ) ,
72
- }
117
+ Ok ( self . max_concurrency )
73
118
}
74
119
}
0 commit comments