Skip to content

Commit d103573

Browse files
authored
fix: boolean and/or expressions with null (#3544)
Resolves #3512 A fun truth table ($V_{x}$ is the validity $x$, $L$ is left, and $R$ is right): | $L$ | $V_{L}$ | $R$ | $V_{R}$ | $∧$ | $V_{∧}$ | $∨$ | $V_{∨}$ | | :-----: | :-----: | :-----: | :-----: | :-----: | :-----: | :-----: | :-----: | | ⬜️ | ⬜️ | ⬜️ | ⬜️ | | ⬜️ | | ⬜️ | | ⬜️ | ⬜️ | ⬜️ | ✅ | ⬜️ | ✅ | | ⬜️ | | ⬜️ | ⬜️ | ✅ | ⬜️ | | ⬜️ | | ⬜️ | | ⬜️ | ⬜️ | ✅ | ✅ | | ⬜️ | ✅ | ✅ | | ⬜️ | ✅ | ⬜️ | ⬜️ | ⬜️ | ✅ | | ⬜️ | | ⬜️ | ✅ | ⬜️ | ✅ | ⬜️ | ✅ | ⬜️ | ✅ | | ⬜️ | ✅ | ✅ | ⬜️ | ⬜️ | ✅ | | ⬜️ | | ⬜️ | ✅ | ✅ | ✅ | ⬜️ | ✅ | ✅ | ✅ | | ✅ | ⬜️ | ⬜️ | ⬜️ | | ⬜️ | | ⬜️ | | ✅ | ⬜️ | ⬜️ | ✅ | ⬜️ | ✅ | | ⬜️ | | ✅ | ⬜️ | ✅ | ⬜️ | | ⬜️ | | ⬜️ | | ✅ | ⬜️ | ✅ | ✅ | | ⬜️ | ✅ | ✅ | | ✅ | ✅ | ⬜️ | ⬜️ | | ⬜️ | ✅ | ✅ | | ✅ | ✅ | ⬜️ | ✅ | ⬜️ | ✅ | ✅ | ✅ | | ✅ | ✅ | ✅ | ⬜️ | | ⬜️ | ✅ | ✅ | | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
1 parent e5ea73f commit d103573

File tree

3 files changed

+194
-66
lines changed

3 files changed

+194
-66
lines changed

src/daft-core/src/array/ops/comparison.rs

+104-36
Original file line numberDiff line numberDiff line change
@@ -601,13 +601,47 @@ impl Not for &BooleanArray {
601601
impl DaftLogical<&Self> for BooleanArray {
602602
type Output = DaftResult<Self>;
603603
fn and(&self, rhs: &Self) -> Self::Output {
604+
// When performing a logical AND with a NULL value:
605+
// - If the non-null value is false, the result is false (not null)
606+
// - If the non-null value is true, the result is null
607+
fn and_with_null(name: &str, arr: &BooleanArray) -> BooleanArray {
608+
let values = arr.as_arrow().values();
609+
610+
let new_validity = match arr.as_arrow().validity() {
611+
None => values.not(),
612+
Some(validity) => arrow2::bitmap::and(&values.not(), validity),
613+
};
614+
615+
BooleanArray::from((
616+
name,
617+
arrow2::array::BooleanArray::new(
618+
arrow2::datatypes::DataType::Boolean,
619+
values.clone(),
620+
Some(new_validity),
621+
),
622+
))
623+
}
624+
604625
match (self.len(), rhs.len()) {
605626
(x, y) if x == y => {
606-
let validity =
607-
arrow_bitmap_and_helper(self.as_arrow().validity(), rhs.as_arrow().validity());
608-
609-
let result_bitmap =
610-
arrow2::bitmap::and(self.as_arrow().values(), rhs.as_arrow().values());
627+
let l_values = self.as_arrow().values();
628+
let r_values = rhs.as_arrow().values();
629+
630+
// (false & NULL) should be false, compute validity to ensure that
631+
let validity = match (self.as_arrow().validity(), rhs.as_arrow().validity()) {
632+
(None, None) => None,
633+
(None, Some(r_valid)) => Some(arrow2::bitmap::or(&l_values.not(), r_valid)),
634+
(Some(l_valid), None) => Some(arrow2::bitmap::or(l_valid, &r_values.not())),
635+
(Some(l_valid), Some(r_valid)) => Some(arrow2::bitmap::or(
636+
&arrow2::bitmap::or(
637+
&arrow2::bitmap::and(&l_values.not(), l_valid),
638+
&arrow2::bitmap::and(&r_values.not(), r_valid),
639+
),
640+
&arrow2::bitmap::and(l_valid, r_valid),
641+
)),
642+
};
643+
644+
let result_bitmap = arrow2::bitmap::and(l_values, r_values);
611645
Ok(Self::from((
612646
self.name(),
613647
arrow2::array::BooleanArray::new(
@@ -617,18 +651,18 @@ impl DaftLogical<&Self> for BooleanArray {
617651
),
618652
)))
619653
}
620-
(l_size, 1) => {
654+
(_, 1) => {
621655
if let Some(value) = rhs.get(0) {
622656
self.and(value)
623657
} else {
624-
Ok(Self::full_null(self.name(), &DataType::Boolean, l_size))
658+
Ok(and_with_null(self.name(), self))
625659
}
626660
}
627-
(1, r_size) => {
661+
(1, _) => {
628662
if let Some(value) = self.get(0) {
629663
rhs.and(value)
630664
} else {
631-
Ok(Self::full_null(self.name(), &DataType::Boolean, r_size))
665+
Ok(and_with_null(self.name(), rhs))
632666
}
633667
}
634668
(l, r) => Err(DaftError::ValueError(format!(
@@ -640,13 +674,47 @@ impl DaftLogical<&Self> for BooleanArray {
640674
}
641675

642676
fn or(&self, rhs: &Self) -> Self::Output {
677+
// When performing a logical OR with a NULL value:
678+
// - If the non-null value is false, the result is null
679+
// - If the non-null value is true, the result is true (not null)
680+
fn or_with_null(name: &str, arr: &BooleanArray) -> BooleanArray {
681+
let values = arr.as_arrow().values();
682+
683+
let new_validity = match arr.as_arrow().validity() {
684+
None => values.clone(),
685+
Some(validity) => arrow2::bitmap::and(values, validity),
686+
};
687+
688+
BooleanArray::from((
689+
name,
690+
arrow2::array::BooleanArray::new(
691+
arrow2::datatypes::DataType::Boolean,
692+
values.clone(),
693+
Some(new_validity),
694+
),
695+
))
696+
}
697+
643698
match (self.len(), rhs.len()) {
644699
(x, y) if x == y => {
645-
let validity =
646-
arrow_bitmap_and_helper(self.as_arrow().validity(), rhs.as_arrow().validity());
647-
648-
let result_bitmap =
649-
arrow2::bitmap::or(self.as_arrow().values(), rhs.as_arrow().values());
700+
let l_values = self.as_arrow().values();
701+
let r_values = rhs.as_arrow().values();
702+
703+
// (true | NULL) should be true, compute validity to ensure that
704+
let validity = match (self.as_arrow().validity(), rhs.as_arrow().validity()) {
705+
(None, None) => None,
706+
(None, Some(r_valid)) => Some(arrow2::bitmap::or(l_values, r_valid)),
707+
(Some(l_valid), None) => Some(arrow2::bitmap::or(l_valid, r_values)),
708+
(Some(l_valid), Some(r_valid)) => Some(arrow2::bitmap::or(
709+
&arrow2::bitmap::or(
710+
&arrow2::bitmap::and(l_values, l_valid),
711+
&arrow2::bitmap::and(r_values, r_valid),
712+
),
713+
&arrow2::bitmap::and(l_valid, r_valid),
714+
)),
715+
};
716+
717+
let result_bitmap = arrow2::bitmap::or(l_values, r_values);
650718
Ok(Self::from((
651719
self.name(),
652720
arrow2::array::BooleanArray::new(
@@ -656,18 +724,18 @@ impl DaftLogical<&Self> for BooleanArray {
656724
),
657725
)))
658726
}
659-
(l_size, 1) => {
727+
(_, 1) => {
660728
if let Some(value) = rhs.get(0) {
661729
self.or(value)
662730
} else {
663-
Ok(Self::full_null(self.name(), &DataType::Boolean, l_size))
731+
Ok(or_with_null(self.name(), self))
664732
}
665733
}
666-
(1, r_size) => {
734+
(1, _) => {
667735
if let Some(value) = self.get(0) {
668736
rhs.or(value)
669737
} else {
670-
Ok(Self::full_null(self.name(), &DataType::Boolean, r_size))
738+
Ok(or_with_null(self.name(), rhs))
671739
}
672740
}
673741
(l, r) => Err(DaftError::ValueError(format!(
@@ -756,33 +824,33 @@ impl DaftCompare<&Self> for NullArray {
756824
impl DaftLogical<bool> for BooleanArray {
757825
type Output = DaftResult<Self>;
758826
fn and(&self, rhs: bool) -> Self::Output {
759-
let validity = self.as_arrow().validity();
760827
if rhs {
761828
Ok(self.clone())
762829
} else {
763-
use arrow2::{array, bitmap::Bitmap, datatypes::DataType};
764-
let arrow_array = array::BooleanArray::new(
765-
DataType::Boolean,
766-
Bitmap::new_zeroed(self.len()),
767-
validity.cloned(),
768-
);
769-
Ok(Self::from((self.name(), arrow_array)))
830+
Ok(Self::from((
831+
self.name(),
832+
arrow2::array::BooleanArray::new(
833+
arrow2::datatypes::DataType::Boolean,
834+
arrow2::bitmap::Bitmap::new_zeroed(self.len()),
835+
None, // false & x is always valid false for any x
836+
),
837+
)))
770838
}
771839
}
772840

773841
fn or(&self, rhs: bool) -> Self::Output {
774-
let validity = self.as_arrow().validity();
775842
if rhs {
776-
use arrow2::{array, bitmap::Bitmap, datatypes::DataType};
777-
let arrow_array = array::BooleanArray::new(
778-
DataType::Boolean,
779-
Bitmap::new_zeroed(self.len()).not(),
780-
validity.cloned(),
781-
);
782-
return Ok(Self::from((self.name(), arrow_array)));
843+
Ok(Self::from((
844+
self.name(),
845+
arrow2::array::BooleanArray::new(
846+
arrow2::datatypes::DataType::Boolean,
847+
arrow2::bitmap::Bitmap::new_trued(self.len()),
848+
None, // true | x is always valid true for any x
849+
),
850+
)))
851+
} else {
852+
Ok(self.clone())
783853
}
784-
785-
Ok(self.clone())
786854
}
787855

788856
fn xor(&self, rhs: bool) -> Self::Output {

0 commit comments

Comments
 (0)