Skip to content

Commit

Permalink
Fix right, full join handling when having multiple non-matching rows …
Browse files Browse the repository at this point in the history
…at the left side (apache#845)

* Fix right, full join handling

* Remove comment

* Clippy

* Update datafusion/src/physical_plan/hash_join.rs

Co-authored-by: Andrew Lamb <[email protected]>

* Fmt

Co-authored-by: Andrew Lamb <[email protected]>
  • Loading branch information
Dandandan and alamb authored Aug 10, 2021
1 parent 0125451 commit 5cadc6a
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 17 deletions.
20 changes: 9 additions & 11 deletions datafusion/src/physical_plan/hash_join.rs
Original file line number Diff line number Diff line change
Expand Up @@ -737,6 +737,7 @@ fn build_join_indexes(
for (row, hash_value) in hash_values.iter().enumerate() {
match left.0.get(*hash_value, |(hash, _)| *hash_value == *hash) {
Some((_, indices)) => {
let mut no_match = true;
for &i in indices {
if equal_rows(
i as usize,
Expand All @@ -745,9 +746,14 @@ fn build_join_indexes(
&keys_values,
)? {
left_indices.append_value(i)?;
} else {
left_indices.append_null()?;
right_indices.append_value(row as u32)?;
no_match = false;
}
}
// If no rows matched left, still must keep the right
// with all nulls for left
if no_match {
left_indices.append_null()?;
right_indices.append_value(row as u32)?;
}
}
Expand All @@ -768,7 +774,7 @@ macro_rules! equal_rows_elem {
let left_array = $l.as_any().downcast_ref::<$array_type>().unwrap();
let right_array = $r.as_any().downcast_ref::<$array_type>().unwrap();

match (left_array.is_null($left), left_array.is_null($right)) {
match (left_array.is_null($left), right_array.is_null($right)) {
(false, false) => left_array.value($left) == right_array.value($right),
_ => false,
}
Expand Down Expand Up @@ -1372,8 +1378,6 @@ mod tests {
}

#[tokio::test]
// Disable until https://github.com/apache/arrow-datafusion/issues/843 fixed
#[cfg(not(feature = "force_hash_collisions"))]
async fn join_full_multi_batch() {
let left = build_table(
("a1", &vec![1, 2, 3]),
Expand Down Expand Up @@ -1639,8 +1643,6 @@ mod tests {
}

#[tokio::test]
// Disable until https://github.com/apache/arrow-datafusion/issues/843 fixed
#[cfg(not(feature = "force_hash_collisions"))]
async fn join_right_one() -> Result<()> {
let left = build_table(
("a1", &vec![1, 2, 3]),
Expand Down Expand Up @@ -1677,8 +1679,6 @@ mod tests {
}

#[tokio::test]
// Disable until https://github.com/apache/arrow-datafusion/issues/843 fixed
#[cfg(not(feature = "force_hash_collisions"))]
async fn partitioned_join_right_one() -> Result<()> {
let left = build_table(
("a1", &vec![1, 2, 3]),
Expand Down Expand Up @@ -1716,8 +1716,6 @@ mod tests {
}

#[tokio::test]
// Disable until https://github.com/apache/arrow-datafusion/issues/843 fixed
#[cfg(not(feature = "force_hash_collisions"))]
async fn join_full_one() -> Result<()> {
let left = build_table(
("a1", &vec![1, 2, 3]),
Expand Down
6 changes: 0 additions & 6 deletions datafusion/tests/sql.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1797,8 +1797,6 @@ async fn equijoin_left_and_condition_from_right() -> Result<()> {
}

#[tokio::test]
// Disable until https://github.com/apache/arrow-datafusion/issues/843 fixed
#[cfg(not(feature = "force_hash_collisions"))]
async fn equijoin_right_and_condition_from_left() -> Result<()> {
let mut ctx = create_join_context("t1_id", "t2_id")?;
let sql =
Expand Down Expand Up @@ -1852,8 +1850,6 @@ async fn left_join() -> Result<()> {
}

#[tokio::test]
// Disable until https://github.com/apache/arrow-datafusion/issues/843 fixed
#[cfg(not(feature = "force_hash_collisions"))]
async fn right_join() -> Result<()> {
let mut ctx = create_join_context("t1_id", "t2_id")?;
let equivalent_sql = [
Expand All @@ -1874,8 +1870,6 @@ async fn right_join() -> Result<()> {
}

#[tokio::test]
// Disable until https://github.com/apache/arrow-datafusion/issues/843 fixed
#[cfg(not(feature = "force_hash_collisions"))]
async fn full_join() -> Result<()> {
let mut ctx = create_join_context("t1_id", "t2_id")?;
let equivalent_sql = [
Expand Down

0 comments on commit 5cadc6a

Please sign in to comment.