From 8cb78391619968b1f596938d9a734b20b58f544e Mon Sep 17 00:00:00 2001
From: Gijs Burghoorn <me@gburghoorn.com>
Date: Wed, 13 Nov 2024 10:08:32 +0100
Subject: [PATCH] feat: Add IPC source node for new streaming engine (#19454)

Co-authored-by: Orson Peters <orsonpeters@gmail.com>
---
 crates/polars-arrow/src/io/ipc/read/common.rs |  18 +-
 crates/polars-arrow/src/io/ipc/read/file.rs   |   2 +-
 crates/polars-arrow/src/io/ipc/read/mod.rs    |   1 +
 crates/polars-arrow/src/io/ipc/read/reader.rs |  90 ++-
 crates/polars-arrow/src/io/ipc/read/stream.rs |  17 +-
 crates/polars-arrow/src/record_batch.rs       |  22 +-
 crates/polars-core/src/frame/mod.rs           |  26 +
 .../polars-core/src/frame/upstream_traits.rs  |  28 +
 crates/polars-core/src/scalar/from.rs         |   3 +
 crates/polars-io/src/utils/other.rs           |   8 +-
 crates/polars-plan/src/plans/optimizer/mod.rs |   3 +-
 .../src/plans/optimizer/slice_pushdown_lp.rs  |  30 +-
 .../polars-stream/src/nodes/io_sources/ipc.rs | 557 ++++++++++++++++++
 .../polars-stream/src/nodes/io_sources/mod.rs |   1 +
 crates/polars-stream/src/nodes/mod.rs         |   1 +
 .../src/physical_plan/lower_ir.rs             |  67 ++-
 .../src/physical_plan/to_graph.rs             |  17 +
 py-polars/tests/unit/io/test_lazy_ipc.py      |   1 +
 18 files changed, 843 insertions(+), 49 deletions(-)
 create mode 100644 crates/polars-stream/src/nodes/io_sources/ipc.rs
 create mode 100644 crates/polars-stream/src/nodes/io_sources/mod.rs
diff --git a/crates/polars-arrow/src/io/ipc/read/common.rs b/crates/polars-arrow/src/io/ipc/read/common.rs
index 6b893c0e8ce3..0a1297bf1184 100644
--- a/crates/polars-arrow/src/io/ipc/read/common.rs
+++ b/crates/polars-arrow/src/io/ipc/read/common.rs
@@ -318,10 +318,14 @@ pub fn read_dictionary<R: Read + Seek>(
     Ok(())
 }
 
-pub fn prepare_projection(
-    schema: &ArrowSchema,
-    mut projection: Vec<usize>,
-) -> (Vec<usize>, PlHashMap<usize, usize>, ArrowSchema) {
+#[derive(Clone)]
+pub struct ProjectionInfo {
+    pub columns: Vec<usize>,
+    pub map: PlHashMap<usize, usize>,
+    pub schema: ArrowSchema,
+}
+
+pub fn prepare_projection(schema: &ArrowSchema, mut projection: Vec<usize>) -> ProjectionInfo {
     let schema = projection
         .iter()
         .map(|x| {
@@ -355,7 +359,11 @@ pub fn prepare_projection(
         }
     }
 
-    (projection, map, schema)
+    ProjectionInfo {
+        columns: projection,
+        map,
+        schema,
+    }
 }
 
 pub fn apply_projection(
diff --git a/crates/polars-arrow/src/io/ipc/read/file.rs b/crates/polars-arrow/src/io/ipc/read/file.rs
index a83e1b758d80..e75fae36730e 100644
--- a/crates/polars-arrow/src/io/ipc/read/file.rs
+++ b/crates/polars-arrow/src/io/ipc/read/file.rs
@@ -305,7 +305,7 @@ fn get_message_from_block_offset<'a, R: Read + Seek>(
         .map_err(|err| polars_err!(oos = OutOfSpecKind::InvalidFlatbufferMessage(err)))
 }
 
-fn get_message_from_block<'a, R: Read + Seek>(
+pub(super) fn get_message_from_block<'a, R: Read + Seek>(
     reader: &mut R,
     block: &arrow_format::ipc::Block,
     message_scratch: &'a mut Vec<u8>,
diff --git a/crates/polars-arrow/src/io/ipc/read/mod.rs b/crates/polars-arrow/src/io/ipc/read/mod.rs
index 88411f9b905f..f4430db7dea2 100644
--- a/crates/polars-arrow/src/io/ipc/read/mod.rs
+++ b/crates/polars-arrow/src/io/ipc/read/mod.rs
@@ -19,6 +19,7 @@ mod schema;
 mod stream;
 
 pub(crate) use common::first_dict_field;
+pub use common::{prepare_projection, ProjectionInfo};
 pub use error::OutOfSpecKind;
 pub use file::{
     deserialize_footer, get_row_count, read_batch, read_file_dictionaries, read_file_metadata,
diff --git a/crates/polars-arrow/src/io/ipc/read/reader.rs b/crates/polars-arrow/src/io/ipc/read/reader.rs
index 8369d2960233..e9523477fe39 100644
--- a/crates/polars-arrow/src/io/ipc/read/reader.rs
+++ b/crates/polars-arrow/src/io/ipc/read/reader.rs
@@ -1,9 +1,9 @@
 use std::io::{Read, Seek};
 
 use polars_error::PolarsResult;
-use polars_utils::aliases::PlHashMap;
 
 use super::common::*;
+use super::file::{get_message_from_block, get_record_batch};
 use super::{read_batch, read_file_dictionaries, Dictionaries, FileMetadata};
 use crate::array::Array;
 use crate::datatypes::ArrowSchema;
@@ -16,7 +16,7 @@ pub struct FileReader<R: Read + Seek> {
     // the dictionaries are going to be read
     dictionaries: Option<Dictionaries>,
     current_block: usize,
-    projection: Option<(Vec<usize>, PlHashMap<usize, usize>, ArrowSchema)>,
+    projection: Option<ProjectionInfo>,
     remaining: usize,
     data_scratch: Vec<u8>,
     message_scratch: Vec<u8>,
@@ -32,10 +32,29 @@ impl<R: Read + Seek> FileReader<R> {
         projection: Option<Vec<usize>>,
         limit: Option<usize>,
     ) -> Self {
-        let projection = projection.map(|projection| {
-            let (p, h, schema) = prepare_projection(&metadata.schema, projection);
-            (p, h, schema)
-        });
+        let projection =
+            projection.map(|projection| prepare_projection(&metadata.schema, projection));
+        Self {
+            reader,
+            metadata,
+            dictionaries: Default::default(),
+            projection,
+            remaining: limit.unwrap_or(usize::MAX),
+            current_block: 0,
+            data_scratch: Default::default(),
+            message_scratch: Default::default(),
+        }
+    }
+
+    /// Creates a new [`FileReader`]. Use `projection` to only take certain columns.
+    /// # Panic
+    /// Panics iff the projection is not in increasing order (e.g. `[1, 0]` nor `[0, 1, 1]` are valid)
+    pub fn new_with_projection_info(
+        reader: R,
+        metadata: FileMetadata,
+        projection: Option<ProjectionInfo>,
+        limit: Option<usize>,
+    ) -> Self {
         Self {
             reader,
             metadata,
@@ -52,7 +71,7 @@ impl<R: Read + Seek> FileReader<R> {
     pub fn schema(&self) -> &ArrowSchema {
         self.projection
             .as_ref()
-            .map(|x| &x.2)
+            .map(|x| &x.schema)
             .unwrap_or(&self.metadata.schema)
     }
 
@@ -66,9 +85,23 @@ impl<R: Read + Seek> FileReader<R> {
         self.reader
     }
 
+    pub fn set_current_block(&mut self, idx: usize) {
+        self.current_block = idx;
+    }
+
+    pub fn get_current_block(&self) -> usize {
+        self.current_block
+    }
+
+    /// Get the inner memory scratches so they can be reused in a new writer.
+    /// This can be utilized to save memory allocations for performance reasons.
+    pub fn take_projection_info(&mut self) -> Option<ProjectionInfo> {
+        std::mem::take(&mut self.projection)
+    }
+
     /// Get the inner memory scratches so they can be reused in a new writer.
     /// This can be utilized to save memory allocations for performance reasons.
-    pub fn get_scratches(&mut self) -> (Vec<u8>, Vec<u8>) {
+    pub fn take_scratches(&mut self) -> (Vec<u8>, Vec<u8>) {
         (
             std::mem::take(&mut self.data_scratch),
             std::mem::take(&mut self.message_scratch),
@@ -91,6 +124,43 @@ impl<R: Read + Seek> FileReader<R> {
         };
         Ok(())
     }
+
+    /// Skip over blocks until we have seen at most `offset` rows, returning how many rows we are
+    /// still too see.  
+    ///
+    /// This will never go over the `offset`. Meaning that if the `offset < current_block.len()`,
+    /// the block will not be skipped.
+    pub fn skip_blocks_till_limit(&mut self, offset: u64) -> PolarsResult<u64> {
+        let mut remaining_offset = offset;
+
+        for (i, block) in self.metadata.blocks.iter().enumerate() {
+            let message =
+                get_message_from_block(&mut self.reader, block, &mut self.message_scratch)?;
+            let record_batch = get_record_batch(message)?;
+
+            let length = record_batch.length()?;
+            let length = length as u64;
+
+            if length > remaining_offset {
+                self.current_block = i;
+                return Ok(remaining_offset);
+            }
+
+            remaining_offset -= length;
+        }
+
+        self.current_block = self.metadata.blocks.len();
+        Ok(remaining_offset)
+    }
+
+    pub fn next_record_batch(
+        &mut self,
+    ) -> Option<PolarsResult<arrow_format::ipc::RecordBatchRef<'_>>> {
+        let block = self.metadata.blocks.get(self.current_block)?;
+        self.current_block += 1;
+        let message = get_message_from_block(&mut self.reader, block, &mut self.message_scratch);
+        Some(message.and_then(|m| get_record_batch(m)))
+    }
 }
 
 impl<R: Read + Seek> Iterator for FileReader<R> {
@@ -114,7 +184,7 @@ impl<R: Read + Seek> Iterator for FileReader<R> {
             &mut self.reader,
             self.dictionaries.as_ref().unwrap(),
             &self.metadata,
-            self.projection.as_ref().map(|x| x.0.as_ref()),
+            self.projection.as_ref().map(|x| x.columns.as_ref()),
             Some(self.remaining),
             block,
             &mut self.message_scratch,
@@ -122,7 +192,7 @@ impl<R: Read + Seek> Iterator for FileReader<R> {
         );
         self.remaining -= chunk.as_ref().map(|x| x.len()).unwrap_or_default();
 
-        let chunk = if let Some((_, map, _)) = &self.projection {
+        let chunk = if let Some(ProjectionInfo { map, .. }) = &self.projection {
             // re-order according to projection
             chunk.map(|chunk| apply_projection(chunk, map))
         } else {
diff --git a/crates/polars-arrow/src/io/ipc/read/stream.rs b/crates/polars-arrow/src/io/ipc/read/stream.rs
index 87241596cdbe..b2cfb727b385 100644
--- a/crates/polars-arrow/src/io/ipc/read/stream.rs
+++ b/crates/polars-arrow/src/io/ipc/read/stream.rs
@@ -2,7 +2,6 @@ use std::io::Read;
 
 use arrow_format::ipc::planus::ReadAsRoot;
 use polars_error::{polars_bail, polars_err, PolarsError, PolarsResult};
-use polars_utils::aliases::PlHashMap;
 
 use super::super::CONTINUATION_MARKER;
 use super::common::*;
@@ -93,7 +92,7 @@ fn read_next<R: Read>(
     dictionaries: &mut Dictionaries,
     message_buffer: &mut Vec<u8>,
     data_buffer: &mut Vec<u8>,
-    projection: &Option<(Vec<usize>, PlHashMap<usize, usize>, ArrowSchema)>,
+    projection: &Option<ProjectionInfo>,
     scratch: &mut Vec<u8>,
 ) -> PolarsResult<Option<StreamState>> {
     // determine metadata length
@@ -169,7 +168,7 @@ fn read_next<R: Read>(
                 batch,
                 &metadata.schema,
                 &metadata.ipc_schema,
-                projection.as_ref().map(|x| x.0.as_ref()),
+                projection.as_ref().map(|x| x.columns.as_ref()),
                 None,
                 dictionaries,
                 metadata.version,
@@ -179,7 +178,7 @@ fn read_next<R: Read>(
                 scratch,
             );
 
-            if let Some((_, map, _)) = projection {
+            if let Some(ProjectionInfo { map, .. }) = projection {
                 // re-order according to projection
                 chunk
                     .map(|chunk| apply_projection(chunk, map))
@@ -238,7 +237,7 @@ pub struct StreamReader<R: Read> {
     finished: bool,
     data_buffer: Vec<u8>,
     message_buffer: Vec<u8>,
-    projection: Option<(Vec<usize>, PlHashMap<usize, usize>, ArrowSchema)>,
+    projection: Option<ProjectionInfo>,
     scratch: Vec<u8>,
 }
 
@@ -249,10 +248,8 @@ impl<R: Read> StreamReader<R> {
     /// encounter a schema.
     /// To check if the reader is done, use `is_finished(self)`
     pub fn new(reader: R, metadata: StreamMetadata, projection: Option<Vec<usize>>) -> Self {
-        let projection = projection.map(|projection| {
-            let (p, h, schema) = prepare_projection(&metadata.schema, projection);
-            (p, h, schema)
-        });
+        let projection =
+            projection.map(|projection| prepare_projection(&metadata.schema, projection));
 
         Self {
             reader,
@@ -275,7 +272,7 @@ impl<R: Read> StreamReader<R> {
     pub fn schema(&self) -> &ArrowSchema {
         self.projection
             .as_ref()
-            .map(|x| &x.2)
+            .map(|x| &x.schema)
             .unwrap_or(&self.metadata.schema)
     }
 
diff --git a/crates/polars-arrow/src/record_batch.rs b/crates/polars-arrow/src/record_batch.rs
index f58d129831f1..2b0b8112ea9e 100644
--- a/crates/polars-arrow/src/record_batch.rs
+++ b/crates/polars-arrow/src/record_batch.rs
@@ -9,7 +9,7 @@ use crate::array::{Array, ArrayRef};
 /// the same length, [`RecordBatchT::len`].
 #[derive(Debug, Clone, PartialEq, Eq)]
 pub struct RecordBatchT<A: AsRef<dyn Array>> {
-    length: usize,
+    height: usize,
     arrays: Vec<A>,
 }
 
@@ -29,14 +29,14 @@ impl<A: AsRef<dyn Array>> RecordBatchT<A> {
     ///
     /// # Error
     ///
-    /// I.f.f. the length does not match the length of any of the arrays
-    pub fn try_new(length: usize, arrays: Vec<A>) -> PolarsResult<Self> {
+    /// I.f.f. the height does not match the length of any of the arrays
+    pub fn try_new(height: usize, arrays: Vec<A>) -> PolarsResult<Self> {
         polars_ensure!(
-            arrays.iter().all(|arr| arr.as_ref().len() == length),
+            arrays.iter().all(|arr| arr.as_ref().len() == height),
             ComputeError: "RecordBatch requires all its arrays to have an equal number of rows",
         );
 
-        Ok(Self { length, arrays })
+        Ok(Self { height, arrays })
     }
 
     /// returns the [`Array`]s in [`RecordBatchT`]
@@ -51,7 +51,17 @@ impl<A: AsRef<dyn Array>> RecordBatchT<A> {
 
     /// returns the number of rows of every array
     pub fn len(&self) -> usize {
-        self.length
+        self.height
+    }
+
+    /// returns the number of rows of every array
+    pub fn height(&self) -> usize {
+        self.height
+    }
+
+    /// returns the number of arrays
+    pub fn width(&self) -> usize {
+        self.arrays.len()
     }
 
     /// returns whether the columns have any rows
diff --git a/crates/polars-core/src/frame/mod.rs b/crates/polars-core/src/frame/mod.rs
index 7e2d7b050dcf..0d8fef7f4c4a 100644
--- a/crates/polars-core/src/frame/mod.rs
+++ b/crates/polars-core/src/frame/mod.rs
@@ -3,6 +3,7 @@
 use std::borrow::Cow;
 use std::{mem, ops};
 
+use polars_row::ArrayRef;
 use polars_utils::itertools::Itertools;
 use rayon::prelude::*;
 
@@ -3334,6 +3335,31 @@ impl DataFrame {
     pub(crate) fn infer_height(cols: &[Column]) -> usize {
         cols.first().map_or(0, Column::len)
     }
+
+    pub fn append_record_batch(&mut self, rb: RecordBatchT<ArrayRef>) -> PolarsResult<()> {
+        polars_ensure!(
+            rb.arrays().len() == self.width(),
+            InvalidOperation: "attempt to extend dataframe of width {} with record batch of width {}",
+            self.width(),
+            rb.arrays().len(),
+        );
+
+        if rb.height() == 0 {
+            return Ok(());
+        }
+
+        // SAFETY:
+        // - we don't adjust the names of the columns
+        // - each column gets appended the same number of rows, which is an invariant of
+        //   record_batch.
+        let columns = unsafe { self.get_columns_mut() };
+        for (col, arr) in columns.iter_mut().zip(rb.into_arrays()) {
+            let arr_series = Series::from_arrow_chunks(PlSmallStr::EMPTY, vec![arr])?.into_column();
+            col.append(&arr_series)?;
+        }
+
+        Ok(())
+    }
 }
 
 pub struct RecordBatchIter<'a> {
diff --git a/crates/polars-core/src/frame/upstream_traits.rs b/crates/polars-core/src/frame/upstream_traits.rs
index 38b346ace652..1392f87c052f 100644
--- a/crates/polars-core/src/frame/upstream_traits.rs
+++ b/crates/polars-core/src/frame/upstream_traits.rs
@@ -1,5 +1,7 @@
 use std::ops::{Index, Range, RangeFrom, RangeFull, RangeInclusive, RangeTo, RangeToInclusive};
 
+use arrow::record_batch::RecordBatchT;
+
 use crate::prelude::*;
 
 impl FromIterator<Series> for DataFrame {
@@ -22,6 +24,32 @@ impl FromIterator<Column> for DataFrame {
     }
 }
 
+impl TryExtend<RecordBatchT<Box<dyn Array>>> for DataFrame {
+    fn try_extend<I: IntoIterator<Item = RecordBatchT<Box<dyn Array>>>>(
+        &mut self,
+        iter: I,
+    ) -> PolarsResult<()> {
+        for record_batch in iter {
+            self.append_record_batch(record_batch)?;
+        }
+
+        Ok(())
+    }
+}
+
+impl TryExtend<PolarsResult<RecordBatchT<Box<dyn Array>>>> for DataFrame {
+    fn try_extend<I: IntoIterator<Item = PolarsResult<RecordBatchT<Box<dyn Array>>>>>(
+        &mut self,
+        iter: I,
+    ) -> PolarsResult<()> {
+        for record_batch in iter {
+            self.append_record_batch(record_batch?)?;
+        }
+
+        Ok(())
+    }
+}
+
 impl Index<usize> for DataFrame {
     type Output = Column;
 
diff --git a/crates/polars-core/src/scalar/from.rs b/crates/polars-core/src/scalar/from.rs
index 3af8671dadd1..c104c2ea8573 100644
--- a/crates/polars-core/src/scalar/from.rs
+++ b/crates/polars-core/src/scalar/from.rs
@@ -1,3 +1,5 @@
+use polars_utils::pl_str::PlSmallStr;
+
 use super::{AnyValue, DataType, Scalar};
 
 macro_rules! impl_from {
@@ -25,4 +27,5 @@ impl_from! {
     (u64, UInt64, UInt64)
     (f32, Float32, Float32)
     (f64, Float64, Float64)
+    (PlSmallStr, StringOwned, String)
 }
diff --git a/crates/polars-io/src/utils/other.rs b/crates/polars-io/src/utils/other.rs
index 4e039124933f..f4ef629821a9 100644
--- a/crates/polars-io/src/utils/other.rs
+++ b/crates/polars-io/src/utils/other.rs
@@ -45,7 +45,7 @@ pub fn get_reader_bytes<R: Read + MmapBytesReader + ?Sized>(
     feature = "parquet",
     feature = "avro"
 ))]
-pub(crate) fn apply_projection(schema: &ArrowSchema, projection: &[usize]) -> ArrowSchema {
+pub fn apply_projection(schema: &ArrowSchema, projection: &[usize]) -> ArrowSchema {
     projection
         .iter()
         .map(|idx| schema.get_at_index(*idx).unwrap())
@@ -59,14 +59,14 @@ pub(crate) fn apply_projection(schema: &ArrowSchema, projection: &[usize]) -> Ar
     feature = "avro",
     feature = "parquet"
 ))]
-pub(crate) fn columns_to_projection(
-    columns: &[String],
+pub fn columns_to_projection<T: AsRef<str>>(
+    columns: &[T],
     schema: &ArrowSchema,
 ) -> PolarsResult<Vec<usize>> {
     let mut prj = Vec::with_capacity(columns.len());
 
     for column in columns {
-        let i = schema.try_index_of(column)?;
+        let i = schema.try_index_of(column.as_ref())?;
         prj.push(i);
     }
 
diff --git a/crates/polars-plan/src/plans/optimizer/mod.rs b/crates/polars-plan/src/plans/optimizer/mod.rs
index 70880ca78359..dc0d330d8b86 100644
--- a/crates/polars-plan/src/plans/optimizer/mod.rs
+++ b/crates/polars-plan/src/plans/optimizer/mod.rs
@@ -89,6 +89,7 @@ pub fn optimize(
     let simplify_expr = opt_state.contains(OptFlags::SIMPLIFY_EXPR);
     let slice_pushdown = opt_state.contains(OptFlags::SLICE_PUSHDOWN);
     let streaming = opt_state.contains(OptFlags::STREAMING);
+    let new_streaming = opt_state.contains(OptFlags::NEW_STREAMING);
     let fast_projection = opt_state.contains(OptFlags::FAST_PROJECTION);
 
     // Don't run optimizations that don't make sense on a single node.
@@ -181,7 +182,7 @@ pub fn optimize(
     }
 
     if slice_pushdown {
-        let slice_pushdown_opt = SlicePushDown::new(streaming);
+        let slice_pushdown_opt = SlicePushDown::new(streaming, new_streaming);
         let alp = lp_arena.take(lp_top);
         let alp = slice_pushdown_opt.optimize(alp, lp_arena, expr_arena)?;
 
diff --git a/crates/polars-plan/src/plans/optimizer/slice_pushdown_lp.rs b/crates/polars-plan/src/plans/optimizer/slice_pushdown_lp.rs
index 9c2f8497fac8..a5ff806abae9 100644
--- a/crates/polars-plan/src/plans/optimizer/slice_pushdown_lp.rs
+++ b/crates/polars-plan/src/plans/optimizer/slice_pushdown_lp.rs
@@ -5,6 +5,7 @@ use crate::prelude::*;
 
 pub(super) struct SlicePushDown {
     streaming: bool,
+    new_streaming: bool,
     pub scratch: Vec<Node>,
 }
 
@@ -59,9 +60,10 @@ fn can_pushdown_slice_past_projections(exprs: &[ExprIR], arena: &Arena<AExpr>) -
 }
 
 impl SlicePushDown {
-    pub(super) fn new(streaming: bool) -> Self {
+    pub(super) fn new(streaming: bool, new_streaming: bool) -> Self {
         Self {
             streaming,
+            new_streaming,
             scratch: vec![],
         }
     }
@@ -211,6 +213,32 @@ impl SlicePushDown {
 
                 Ok(lp)
             },
+
+            #[cfg(feature = "ipc")]
+            (Scan {
+                sources,
+                file_info,
+                hive_parts,
+                output_schema,
+                mut file_options,
+                predicate,
+                scan_type: scan_type @ FileScan::Ipc { .. },
+            }, Some(state)) if self.new_streaming && predicate.is_none() =>  {
+                file_options.slice = Some((state.offset, state.len as usize));
+
+                let lp = Scan {
+                    sources,
+                    file_info,
+                    hive_parts,
+                    output_schema,
+                    scan_type,
+                    file_options,
+                    predicate,
+                };
+
+                Ok(lp)
+            },
+
             // TODO! we currently skip slice pushdown if there is a predicate.
             (Scan {
                 sources,
diff --git a/crates/polars-stream/src/nodes/io_sources/ipc.rs b/crates/polars-stream/src/nodes/io_sources/ipc.rs
new file mode 100644
index 000000000000..3a83c8e3132c
--- /dev/null
+++ b/crates/polars-stream/src/nodes/io_sources/ipc.rs
@@ -0,0 +1,557 @@
+use std::cmp::Reverse;
+use std::io::Cursor;
+use std::ops::Range;
+use std::sync::Arc;
+
+use polars_core::config;
+use polars_core::frame::DataFrame;
+use polars_core::prelude::{Column, DataType};
+use polars_core::scalar::Scalar;
+use polars_core::utils::arrow::array::TryExtend;
+use polars_core::utils::arrow::io::ipc::read::{
+    prepare_projection, read_file_metadata, FileMetadata, FileReader, ProjectionInfo,
+};
+use polars_error::{ErrString, PolarsError, PolarsResult};
+use polars_expr::prelude::PhysicalExpr;
+use polars_expr::state::ExecutionState;
+use polars_io::cloud::CloudOptions;
+use polars_io::ipc::IpcScanOptions;
+use polars_io::utils::columns_to_projection;
+use polars_io::RowIndex;
+use polars_plan::plans::hive::HivePartitions;
+use polars_plan::plans::{FileInfo, ScanSources};
+use polars_plan::prelude::FileScanOptions;
+use polars_utils::mmap::MemSlice;
+use polars_utils::pl_str::PlSmallStr;
+use polars_utils::priority::Priority;
+use polars_utils::IdxSize;
+
+use crate::async_primitives::distributor_channel::distributor_channel;
+use crate::async_primitives::linearizer::Linearizer;
+use crate::morsel::{get_ideal_morsel_size, SourceToken};
+use crate::nodes::{
+    ComputeNode, JoinHandle, Morsel, MorselSeq, PortState, TaskPriority, TaskScope,
+};
+use crate::pipe::{RecvPort, SendPort};
+use crate::{DEFAULT_DISTRIBUTOR_BUFFER_SIZE, DEFAULT_LINEARIZER_BUFFER_SIZE};
+
+const ROW_COUNT_OVERFLOW_ERR: PolarsError = PolarsError::ComputeError(ErrString::new_static(
+    "\
+IPC file produces more than 2^32 rows; \
+consider compiling with polars-bigidx feature (polars-u64-idx package on python)",
+));
+
+pub struct IpcSourceNode {
+    sources: ScanSources,
+
+    config: IpcSourceNodeConfig,
+    num_pipelines: usize,
+
+    /// Every phase we need to be able to continue from where we left off, so we save the state of
+    /// the Walker task.
+    state: IpcSourceNodeState,
+}
+
+pub struct IpcSourceNodeConfig {
+    row_index: Option<RowIndex>,
+    projection_info: Option<ProjectionInfo>,
+
+    rechunk: bool,
+    include_file_paths: Option<PlSmallStr>,
+
+    first_metadata: FileMetadata,
+}
+
+pub struct IpcSourceNodeState {
+    morsel_seq: u64,
+    row_idx_offset: IdxSize,
+
+    slice: Range<usize>,
+
+    source_idx: usize,
+    source: Option<Source>,
+}
+
+pub struct Source {
+    file_path: Option<Arc<str>>,
+
+    memslice: Arc<MemSlice>,
+    metadata: Arc<FileMetadata>,
+
+    block_offset: usize,
+}
+
+impl IpcSourceNode {
+    #[allow(clippy::too_many_arguments)]
+    pub fn new(
+        sources: ScanSources,
+        _file_info: FileInfo,
+        _hive_parts: Option<Arc<Vec<HivePartitions>>>, // @TODO
+        predicate: Option<Arc<dyn PhysicalExpr>>,
+        options: IpcScanOptions,
+        _cloud_options: Option<CloudOptions>,
+        file_options: FileScanOptions,
+        mut first_metadata: Option<FileMetadata>,
+    ) -> PolarsResult<Self> {
+        // These should have all been removed during lower_ir
+        assert!(predicate.is_none());
+        assert!(!sources.is_empty());
+
+        let IpcScanOptions = options;
+
+        let FileScanOptions {
+            slice,
+            with_columns,
+            cache: _, // @TODO
+            row_index,
+            rechunk,
+            file_counter: _, // @TODO
+            hive_options: _, // @TODO
+            glob: _,         // @TODO
+            include_file_paths,
+            allow_missing_columns: _, // @TODO
+        } = file_options;
+
+        let first_metadata = match first_metadata.take() {
+            Some(md) => md,
+            None => {
+                let source = sources.iter().next().unwrap();
+                let source = source.to_memslice()?;
+                read_file_metadata(&mut std::io::Cursor::new(&*source))?
+            },
+        };
+
+        let projection = with_columns
+            .as_ref()
+            .map(|cols| columns_to_projection(cols, &first_metadata.schema))
+            .transpose()?;
+        let projection_info = projection
+            .as_ref()
+            .map(|p| prepare_projection(&first_metadata.schema, p.clone()));
+
+        let state = IpcSourceNodeState {
+            morsel_seq: 0,
+            row_idx_offset: row_index.as_ref().map_or(0, |ri| ri.offset),
+
+            // Always create a slice. If no slice was given, just make the biggest slice possible.
+            slice: slice.map_or(0..usize::MAX, |(offset, length)| {
+                let offset = offset as usize;
+                offset..offset + length
+            }),
+
+            source_idx: 0,
+            source: None,
+        };
+
+        Ok(IpcSourceNode {
+            sources,
+
+            config: IpcSourceNodeConfig {
+                row_index,
+                projection_info,
+
+                rechunk,
+                include_file_paths,
+
+                first_metadata,
+            },
+
+            num_pipelines: 0,
+
+            state,
+        })
+    }
+}
+
+/// Move `slice` forward by `n` and return the slice until then.
+fn slice_take(slice: &mut Range<usize>, n: usize) -> Range<usize> {
+    let offset = slice.start;
+    let length = slice.len();
+
+    assert!(offset < n);
+
+    let chunk_length = (n - offset).min(length);
+    let rng = offset..offset + chunk_length;
+    *slice = 0..length - chunk_length;
+
+    rng
+}
+
+fn get_max_morsel_size() -> usize {
+    std::env::var("POLARS_STREAMING_IPC_SOURCE_MAX_MORSEL_SIZE")
+        .map_or_else(
+            |_| get_ideal_morsel_size(),
+            |v| {
+                v.parse::<usize>().expect(
+                    "POLARS_STREAMING_IPC_SOURCE_MAX_MORSEL_SIZE does not contain valid size",
+                )
+            },
+        )
+        .max(1)
+}
+
+impl ComputeNode for IpcSourceNode {
+    fn name(&self) -> &str {
+        "ipc_source"
+    }
+
+    fn initialize(&mut self, num_pipelines: usize) {
+        self.num_pipelines = num_pipelines;
+    }
+
+    fn update_state(&mut self, recv: &mut [PortState], send: &mut [PortState]) -> PolarsResult<()> {
+        assert!(recv.is_empty());
+        assert_eq!(send.len(), 1);
+
+        if self.state.slice.is_empty() || self.state.source_idx >= self.sources.len() {
+            send[0] = PortState::Done;
+        }
+
+        if send[0] != PortState::Done {
+            send[0] = PortState::Ready;
+        }
+
+        Ok(())
+    }
+
+    fn spawn<'env, 's>(
+        &'env mut self,
+        scope: &'s TaskScope<'s, 'env>,
+        recv_ports: &mut [Option<RecvPort<'_>>],
+        send_ports: &mut [Option<SendPort<'_>>],
+        _state: &'s ExecutionState,
+        join_handles: &mut Vec<JoinHandle<PolarsResult<()>>>,
+    ) {
+        assert!(recv_ports.is_empty());
+        assert_eq!(send_ports.len(), 1);
+
+        // Split size for morsels.
+        let max_morsel_size = get_max_morsel_size();
+        let source_token = SourceToken::new();
+
+        let num_pipelines = self.num_pipelines;
+        let config = &self.config;
+        let sources = &self.sources;
+        let state = &mut self.state;
+
+        /// Messages sent from Walker task to Decoder tasks.
+        struct BatchMessage {
+            memslice: Arc<MemSlice>,
+            metadata: Arc<FileMetadata>,
+            file_path: Option<Arc<str>>,
+            row_idx_offset: IdxSize,
+            slice: Range<usize>,
+            block_range: Range<usize>,
+            morsel_seq_base: u64,
+        }
+
+        // Walker task -> Decoder tasks.
+        let (mut batch_tx, batch_rxs) =
+            distributor_channel::<BatchMessage>(num_pipelines, DEFAULT_DISTRIBUTOR_BUFFER_SIZE);
+        // Decoder tasks -> Distributor task.
+        let (mut decoded_rx, decoded_tx) = Linearizer::<Priority<Reverse<MorselSeq>, Morsel>>::new(
+            num_pipelines,
+            DEFAULT_LINEARIZER_BUFFER_SIZE,
+        );
+        // Distributor task -> output.
+        let mut sender = send_ports[0].take().unwrap().serial();
+
+        // Distributor task.
+        //
+        // Shuffles morsels from `n` producers amongst `n` consumers.
+        //
+        // If record batches in the source IPC file are large, one decoder might produce many
+        // morsels at the same time. At the same time, other decoders might not produce anything.
+        // Therefore, we would like to distribute the output of a single decoder task over the
+        // available output pipelines.
+        join_handles.push(scope.spawn_task(TaskPriority::High, async move {
+            while let Some(morsel) = decoded_rx.get().await {
+                if sender.send(morsel.1).await.is_err() {
+                    break;
+                }
+            }
+            PolarsResult::Ok(())
+        }));
+
+        // Decoder tasks.
+        //
+        // Tasks a IPC file and certain number of blocks and decodes each block as a record batch.
+        // Then, all record batches are concatenated into a DataFrame. If the resulting DataFrame
+        // is too large, which happens when we have one very large block, the DataFrame is split
+        // into smaller pieces an spread among the pipelines.
+        let decoder_tasks = decoded_tx.into_iter().zip(batch_rxs)
+            .map(|(mut send, mut rx)| {
+                let source_token = source_token.clone();
+                scope.spawn_task(TaskPriority::Low, async move {
+                    // Amortize allocations.
+                    let mut data_scratch = Vec::new();
+                    let mut message_scratch = Vec::new();
+                    let mut projection_info = config.projection_info.clone();
+
+                    let schema = projection_info.as_ref().map_or(config.first_metadata.schema.as_ref(), |ProjectionInfo { schema, .. }| schema);
+                    let pl_schema = schema
+                        .iter()
+                        .map(|(n, f)| (n.clone(), DataType::from_arrow(&f.dtype, true)))
+                        .collect();
+
+                    while let Ok(m) = rx.recv().await {
+                        let BatchMessage {
+                            memslice: source,
+                            metadata,
+                            file_path,
+                            row_idx_offset,
+                            slice,
+                            morsel_seq_base,
+                            block_range,
+                        } = m;
+
+                        let mut reader = FileReader::new_with_projection_info(
+                            Cursor::new(source.as_ref()),
+                            metadata.as_ref().clone(),
+                            std::mem::take(&mut projection_info),
+                            None,
+                        );
+                        reader.set_current_block(block_range.start);
+                        reader.set_scratches((
+                            std::mem::take(&mut data_scratch),
+                            std::mem::take(&mut message_scratch),
+                        ));
+
+                        // Create the DataFrame with the appropriate schema and append all the record
+                        // batches to it. This will perform schema validation as well.
+                        let mut df = DataFrame::empty_with_schema(&pl_schema);
+                        df.try_extend(reader.by_ref().take(block_range.len()))?;
+
+                        df = df.slice(slice.start as i64, slice.len());
+
+                        if config.rechunk {
+                            df.rechunk_mut();
+                        }
+
+                        if let Some(RowIndex { name, offset: _ }) = &config.row_index {
+                            let offset = row_idx_offset + slice.start as IdxSize;
+                            df = df.with_row_index(name.clone(), Some(offset))?;
+                        }
+
+                        if let Some(col) = config.include_file_paths.as_ref() {
+                            let file_path = file_path.unwrap();
+                            let file_path = Scalar::from(PlSmallStr::from(file_path.as_ref()));
+                            df.with_column(Column::new_scalar(
+                                col.clone(),
+                                file_path,
+                                df.height(),
+                            ))?;
+                        }
+
+                        // If the block is very large, we want to split the block amongst the
+                        // pipelines. That will at least allow some parallelism.
+                        if df.height() > max_morsel_size && config::verbose() {
+                            eprintln!("IPC source encountered a (too) large record batch of {} rows. Splitting and continuing.", df.height());
+                        }
+                        for i in 0..df.height().div_ceil(max_morsel_size) {
+                            let morsel = df.slice((i * max_morsel_size) as i64, max_morsel_size);
+                            let seq = MorselSeq::new(morsel_seq_base + i as u64);
+                            let morsel = Morsel::new(
+                                morsel,
+                                seq,
+                                source_token.clone(),
+                            );
+                            if send.insert(Priority(Reverse(seq), morsel)).await.is_err() {
+                                break;
+                            }
+                        }
+
+                        (data_scratch, message_scratch) = reader.take_scratches();
+                        projection_info = reader.take_projection_info();
+                    }
+
+                    PolarsResult::Ok(())
+                })
+            })
+            .collect::<Vec<_>>();
+
+        // Walker task.
+        //
+        // Walks all the sources and supplies block ranges to the decoder tasks.
+        join_handles.push(scope.spawn_task(TaskPriority::Low, async move {
+            struct Batch {
+                row_idx_offset: IdxSize,
+                block_start: usize,
+                num_rows: usize,
+            }
+
+            // Batch completion parameters
+            let batch_size_limit = get_ideal_morsel_size();
+            let sliced_batch_size_limit = state.slice.len().div_ceil(num_pipelines);
+            let batch_block_limit = if sources.len() >= num_pipelines {
+                // If there are more files than decoder tasks, try to subdivide the files instead
+                // of the blocks.
+                usize::MAX
+            } else {
+                config.first_metadata.blocks.len().div_ceil(num_pipelines)
+            };
+
+            // Amortize allocations
+            let mut data_scratch = Vec::new();
+            let mut message_scratch = Vec::new();
+            let mut projection_info = config.projection_info.clone();
+
+            'source_loop: while !state.slice.is_empty() {
+                let source = match state.source {
+                    Some(ref mut source) => source,
+                    None => {
+                        let Some(source) = sources.get(state.source_idx) else {
+                            break;
+                        };
+
+                        let file_path: Option<Arc<str>> = config
+                            .include_file_paths
+                            .as_ref()
+                            .map(|_| source.to_include_path_name().into());
+                        let memslice = source.to_memslice()?;
+                        let metadata = if state.source_idx == 0 {
+                            config.first_metadata.clone()
+                        } else {
+                            read_file_metadata(&mut std::io::Cursor::new(memslice.as_ref()))?
+                        };
+
+                        state.source.insert(Source {
+                            file_path,
+                            memslice: Arc::new(memslice),
+                            metadata: Arc::new(metadata),
+                            block_offset: 0,
+                        })
+                    },
+                };
+
+                let mut reader = FileReader::new_with_projection_info(
+                    Cursor::new(source.memslice.as_ref()),
+                    source.metadata.as_ref().clone(),
+                    std::mem::take(&mut projection_info),
+                    None,
+                );
+                reader.set_current_block(source.block_offset);
+                reader.set_scratches((
+                    std::mem::take(&mut data_scratch),
+                    std::mem::take(&mut message_scratch),
+                ));
+
+                if state.slice.start > 0 {
+                    // Skip over all blocks that the slice would skip anyway.
+                    let new_offset = reader.skip_blocks_till_limit(state.slice.start as u64)?;
+
+                    state.row_idx_offset += (state.slice.start as u64 - new_offset) as IdxSize;
+                    state.slice = new_offset as usize..new_offset as usize + state.slice.len();
+
+                    // If we skip the entire file. Don't even try to read from it.
+                    if reader.get_current_block() == reader.metadata().blocks.len() {
+                        (data_scratch, message_scratch) = reader.take_scratches();
+                        projection_info = reader.take_projection_info();
+                        state.source.take();
+                        state.source_idx += 1;
+                        continue;
+                    }
+                }
+
+                let mut batch = Batch {
+                    row_idx_offset: state.row_idx_offset,
+                    block_start: reader.get_current_block(),
+                    num_rows: 0,
+                };
+
+                // We don't yet want to commit these values to the state in case this batch gets
+                // cancelled.
+                let mut uncommitted_slice = state.slice.clone();
+                let mut uncommitted_row_idx_offset = state.row_idx_offset;
+                while !state.slice.is_empty() {
+                    let mut is_batch_complete = false;
+
+                    match reader.next_record_batch() {
+                        None if batch.num_rows == 0 => break,
+
+                        // If we have no more record batches available, we want to send what is
+                        // left.
+                        None => is_batch_complete = true,
+                        Some(record_batch) => {
+                            let rb_num_rows = record_batch?.length()? as usize;
+                            batch.num_rows += rb_num_rows;
+
+                            // We need to ensure that we are not overflowing the IdxSize maximum
+                            // capacity.
+                            let rb_num_rows = IdxSize::try_from(rb_num_rows)
+                                .map_err(|_| ROW_COUNT_OVERFLOW_ERR)?;
+                            uncommitted_row_idx_offset = uncommitted_row_idx_offset
+                                .checked_add(rb_num_rows)
+                                .ok_or(ROW_COUNT_OVERFLOW_ERR)?;
+                        },
+                    }
+
+                    let current_block = reader.get_current_block();
+
+                    // Subdivide into batches for large files.
+                    is_batch_complete |= batch.num_rows >= batch_size_limit;
+                    // Subdivide into batches if the file is sliced.
+                    is_batch_complete |= batch.num_rows >= sliced_batch_size_limit;
+                    // Subdivide into batches for small files.
+                    is_batch_complete |= current_block - batch.block_start >= batch_block_limit;
+
+                    // Batch blocks such that we send appropriately sized morsels. We guarantee a
+                    // lower bound here, but not an upper bound.
+                    if is_batch_complete {
+                        let batch_slice = slice_take(&mut uncommitted_slice, batch.num_rows);
+                        let batch_slice_len = batch_slice.len();
+                        let block_range = batch.block_start..current_block;
+
+                        let message = BatchMessage {
+                            memslice: source.memslice.clone(),
+                            metadata: source.metadata.clone(),
+                            file_path: source.file_path.clone(),
+                            row_idx_offset: batch.row_idx_offset,
+                            slice: batch_slice,
+                            morsel_seq_base: state.morsel_seq,
+                            block_range,
+                        };
+
+                        if source_token.stop_requested() {
+                            break 'source_loop;
+                        }
+
+                        if batch_tx.send(message).await.is_err() {
+                            // This should only happen if the receiver of the decoder
+                            // has broken off, meaning no further input will be needed.
+                            break 'source_loop;
+                        }
+
+                        // Commit the changes to the state.
+                        // Now, we know that the a decoder will process it.
+                        //
+                        // This might generate several morsels if the record batch is very large.
+                        state.morsel_seq += batch_slice_len.div_ceil(max_morsel_size) as u64;
+                        state.slice = uncommitted_slice.clone();
+                        state.row_idx_offset = uncommitted_row_idx_offset;
+                        source.block_offset = current_block;
+
+                        batch = Batch {
+                            row_idx_offset: state.row_idx_offset,
+                            block_start: current_block,
+                            num_rows: 0,
+                        };
+                    }
+                }
+
+                (data_scratch, message_scratch) = reader.take_scratches();
+                projection_info = reader.take_projection_info();
+
+                state.source.take();
+                state.source_idx += 1;
+            }
+
+            drop(batch_tx); // Inform decoder tasks to stop.
+            for decoder_task in decoder_tasks {
+                decoder_task.await?;
+            }
+
+            PolarsResult::Ok(())
+        }));
+    }
+}
diff --git a/crates/polars-stream/src/nodes/io_sources/mod.rs b/crates/polars-stream/src/nodes/io_sources/mod.rs
new file mode 100644
index 000000000000..ce14ad3b0f7a
--- /dev/null
+++ b/crates/polars-stream/src/nodes/io_sources/mod.rs
@@ -0,0 +1 @@
+pub mod ipc;
diff --git a/crates/polars-stream/src/nodes/mod.rs b/crates/polars-stream/src/nodes/mod.rs
index 936c0ceb3ada..effebe67c34b 100644
--- a/crates/polars-stream/src/nodes/mod.rs
+++ b/crates/polars-stream/src/nodes/mod.rs
@@ -5,6 +5,7 @@ pub mod in_memory_sink;
 pub mod in_memory_source;
 pub mod input_independent_select;
 pub mod io_sinks;
+pub mod io_sources;
 pub mod joins;
 pub mod map;
 pub mod multiplexer;
diff --git a/crates/polars-stream/src/physical_plan/lower_ir.rs b/crates/polars-stream/src/physical_plan/lower_ir.rs
index 95e3ae72224d..063c94081dbc 100644
--- a/crates/polars-stream/src/physical_plan/lower_ir.rs
+++ b/crates/polars-stream/src/physical_plan/lower_ir.rs
@@ -1,10 +1,11 @@
 use std::sync::Arc;
 
+use polars_core::frame::DataFrame;
 use polars_core::prelude::{InitHashMaps, PlHashMap, PlIndexMap};
 use polars_core::schema::Schema;
 use polars_error::{polars_ensure, PolarsResult};
 use polars_plan::plans::expr_ir::{ExprIR, OutputName};
-use polars_plan::plans::{AExpr, FunctionIR, IRAggExpr, IR};
+use polars_plan::plans::{AExpr, FileScan, FunctionIR, IRAggExpr, IR};
 use polars_plan::prelude::{FileType, SinkType};
 use polars_utils::arena::{Arena, Node};
 use polars_utils::itertools::Itertools;
@@ -314,23 +315,67 @@ pub fn lower_ir(
                 sources: scan_sources,
                 file_info,
                 hive_parts,
-                output_schema,
+                output_schema: scan_output_schema,
                 scan_type,
-                predicate,
+                mut predicate,
                 file_options,
             } = v.clone()
             else {
                 unreachable!();
             };
 
-            PhysNodeKind::FileScan {
-                scan_sources,
-                file_info,
-                hive_parts,
-                output_schema,
-                scan_type,
-                predicate,
-                file_options,
+            if scan_sources.is_empty() {
+                // If there are no sources, just provide an empty in-memory source with the right
+                // schema.
+                PhysNodeKind::InMemorySource {
+                    df: Arc::new(DataFrame::empty_with_schema(output_schema.as_ref())),
+                }
+            } else {
+                if matches!(scan_type, FileScan::Ipc { .. }) {
+                    // @TODO: All the things the IPC source does not support yet.
+                    if hive_parts.is_some()
+                        || scan_sources.is_cloud_url()
+                        || file_options.allow_missing_columns
+                        || file_options.slice.is_some_and(|(offset, _)| offset < 0)
+                    {
+                        todo!();
+                    }
+                }
+
+                // If the node itself would just filter on the whole output then there is no real
+                // reason to do it in the source node itself.
+                let do_filter_in_separate_node =
+                    predicate.is_some() && matches!(scan_type, FileScan::Ipc { .. });
+
+                if do_filter_in_separate_node {
+                    assert!(file_options.slice.is_none()); // Invariant of the scan
+                    let predicate = predicate.take().unwrap();
+
+                    let input = phys_sm.insert(PhysNode::new(
+                        output_schema.clone(),
+                        PhysNodeKind::FileScan {
+                            scan_sources,
+                            file_info,
+                            hive_parts,
+                            output_schema: scan_output_schema,
+                            scan_type,
+                            predicate: None,
+                            file_options,
+                        },
+                    ));
+
+                    PhysNodeKind::Filter { input, predicate }
+                } else {
+                    PhysNodeKind::FileScan {
+                        scan_sources,
+                        file_info,
+                        hive_parts,
+                        output_schema: scan_output_schema,
+                        scan_type,
+                        predicate,
+                        file_options,
+                    }
+                }
             }
         },
 
diff --git a/crates/polars-stream/src/physical_plan/to_graph.rs b/crates/polars-stream/src/physical_plan/to_graph.rs
index befa9c3a93b9..b701696972a9 100644
--- a/crates/polars-stream/src/physical_plan/to_graph.rs
+++ b/crates/polars-stream/src/physical_plan/to_graph.rs
@@ -367,6 +367,23 @@ fn to_graph_rec<'a>(
                             todo!()
                         }
                     },
+                    FileScan::Ipc {
+                        options,
+                        cloud_options,
+                        metadata: first_metadata,
+                    } => ctx.graph.add_node(
+                        nodes::io_sources::ipc::IpcSourceNode::new(
+                            scan_sources,
+                            file_info,
+                            hive_parts,
+                            predicate,
+                            options,
+                            cloud_options,
+                            file_options,
+                            first_metadata,
+                        )?,
+                        [],
+                    ),
                     _ => todo!(),
                 }
             }
diff --git a/py-polars/tests/unit/io/test_lazy_ipc.py b/py-polars/tests/unit/io/test_lazy_ipc.py
index 0d67b6b06f89..ec75d495ce8d 100644
--- a/py-polars/tests/unit/io/test_lazy_ipc.py
+++ b/py-polars/tests/unit/io/test_lazy_ipc.py
@@ -88,6 +88,7 @@ def test_ipc_list_arg(io_files_path: Path) -> None:
     assert df.row(0) == ("vegetables", 45, 0.5, 2)
 
 
+@pytest.mark.may_fail_auto_streaming
 def test_scan_ipc_local_with_async(
     capfd: Any,
     monkeypatch: Any,