Skip to content

Commit

Permalink
Deploying to gh-pages from @ 78460fe 🚀
Browse files Browse the repository at this point in the history
  • Loading branch information
ritchie46 committed Aug 1, 2023
1 parent 87597ba commit 74f2bcf
Show file tree
Hide file tree
Showing 8 changed files with 54 additions and 54 deletions.

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions polars/chunked_array/object/struct.StructChunked.html

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions polars/datatypes/struct.StructChunked.html

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions polars/prelude/datatypes/struct.StructChunked.html

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions polars/prelude/struct.StructChunked.html

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions polars_core/datatypes/struct.StructChunked.html

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions search-index.js

Large diffs are not rendered by default.

80 changes: 40 additions & 40 deletions src/polars_core/chunked_array/logical/struct_/mod.rs.html
Original file line number Diff line number Diff line change
Expand Up @@ -467,7 +467,7 @@

<span class="kw">use </span>std::collections::BTreeMap;
<span class="kw">use </span>std::io::Write;
<span class="kw">use </span>std::ops::BitAnd;
<span class="kw">use </span>std::ops::BitOr;

<span class="kw">use </span>arrow::bitmap::MutableBitmap;
<span class="kw">use </span>arrow::offset::OffsetsBuffer;
Expand All @@ -491,6 +491,7 @@
field: Field,
chunks: Vec&lt;ArrayRef&gt;,
null_count: usize,
total_null_count: usize,
}

<span class="kw">fn </span>arrays_to_fields(field_arrays: <span class="kw-2">&amp;</span>[ArrayRef], fields: <span class="kw-2">&amp;</span>[Series]) -&gt; Vec&lt;ArrowField&gt; {
Expand Down Expand Up @@ -532,6 +533,9 @@
<span class="kw">pub fn </span>null_count(<span class="kw-2">&amp;</span><span class="self">self</span>) -&gt; usize {
<span class="self">self</span>.null_count
}
<span class="kw">pub fn </span>total_null_count(<span class="kw-2">&amp;</span><span class="self">self</span>) -&gt; usize {
<span class="self">self</span>.total_null_count
}
<span class="kw">pub fn </span>new(name: <span class="kw-2">&amp;</span>str, fields: <span class="kw-2">&amp;</span>[Series]) -&gt; PolarsResult&lt;<span class="self">Self</span>&gt; {
<span class="kw">let </span><span class="kw-2">mut </span>names = PlHashSet::with_capacity(fields.len());
<span class="kw">let </span>first_len = fields.get(<span class="number">0</span>).map(|s| s.len()).unwrap_or(<span class="number">0</span>);
Expand Down Expand Up @@ -638,58 +642,54 @@
field,
chunks: <span class="macro">vec!</span>[arrow_array],
null_count: <span class="number">0</span>,
total_null_count: <span class="number">0</span>,
};
out.set_null_count();
out
}

<span class="kw">fn </span>set_null_count(<span class="kw-2">&amp;mut </span><span class="self">self</span>) {
<span class="kw">let </span><span class="kw-2">mut </span>null_count = <span class="number">0</span>;
<span class="kw">let </span>chunks_lens = <span class="self">self</span>.fields()[<span class="number">0</span>].chunks().len();

<span class="comment">// fast path
// we early return if a column doesn&#39;t have nulls
</span><span class="kw">for </span>i <span class="kw">in </span><span class="number">0</span>..chunks_lens {
<span class="kw">for </span>s <span class="kw">in </span><span class="self">self</span>.fields() {
<span class="kw">let </span>arr = <span class="kw-2">&amp;</span>s.chunks()[i];
<span class="kw">let </span>has_nulls = arr.null_count() &gt; <span class="number">0 </span>|| <span class="macro">matches!</span>(s.dtype(), DataType::Null);
<span class="kw">if </span>!has_nulls {
<span class="self">self</span>.null_count = <span class="number">0</span>;
<span class="kw">return</span>;
}
}
<span class="comment">// Count both the total number of nulls and the rows where everything is null
</span>(<span class="self">self</span>.null_count, <span class="self">self</span>.total_null_count) = (<span class="number">0</span>, <span class="number">0</span>);

<span class="comment">// If there is at least one field with no null values, no rows are null. However, we still
// have to count the number of nulls per field to get the total number. Fortunately this is
// cheap since null counts per chunk are pre-computed.
</span><span class="kw">let </span>(could_have_null_rows, total_null_count) =
<span class="self">self</span>.fields().iter().fold((<span class="bool-val">true</span>, <span class="number">0</span>), |acc, s| {
(acc.<span class="number">0 </span>&amp; (s.null_count() != <span class="number">0</span>), acc.<span class="number">1 </span>+ s.null_count())
});
<span class="self">self</span>.total_null_count = total_null_count;
<span class="kw">if </span>!could_have_null_rows {
<span class="kw">return</span>;
}

<span class="comment">// slow path
// we bitand every null validity bitmask to determine
// in which rows all values are null
</span><span class="kw">for </span>i <span class="kw">in </span><span class="number">0</span>..chunks_lens {
<span class="kw">let </span><span class="kw-2">mut </span>validity_agg = <span class="prelude-val">None</span>;

<span class="kw">let </span><span class="kw-2">mut </span>all_null_array = <span class="bool-val">true</span>;
<span class="comment">// A row is null if all values in it are null, so we bitor every validity bitmask since a
// single valid entry makes that row not null. We can also save some work by not bothering
// to bitor fields that would have all 0 validities (Null dtype or everything null).
</span><span class="kw">for </span>i <span class="kw">in </span><span class="number">0</span>..<span class="self">self</span>.fields()[<span class="number">0</span>].chunks().len() {
<span class="kw">let </span><span class="kw-2">mut </span>validity_agg: <span class="prelude-ty">Option</span>&lt;arrow::bitmap::Bitmap&gt; = <span class="prelude-val">None</span>;
<span class="kw">let </span><span class="kw-2">mut </span>n_nulls = <span class="prelude-val">None</span>;
<span class="kw">for </span>s <span class="kw">in </span><span class="self">self</span>.fields() {
<span class="kw">let </span>arr = <span class="kw-2">&amp;</span>s.chunks()[i];

<span class="kw">if </span>!<span class="macro">matches!</span>(s.dtype(), DataType::Null) {
all_null_array = <span class="bool-val">false</span>;
<span class="kw">match </span>(<span class="kw-2">&amp;</span>validity_agg, arr.validity()) {
(<span class="prelude-val">Some</span>(agg), <span class="prelude-val">Some</span>(validity)) =&gt; validity_agg = <span class="prelude-val">Some</span>(validity.bitand(agg)),
(<span class="prelude-val">None</span>, <span class="prelude-val">Some</span>(validity)) =&gt; validity_agg = <span class="prelude-val">Some</span>(validity.clone()),
<span class="kw">_ </span>=&gt; {}
<span class="kw">if </span>s.dtype() == <span class="kw-2">&amp;</span>DataType::Null {
<span class="comment">// The implicit validity mask is all 0 so it wouldn&#39;t affect the bitor
</span><span class="kw">continue</span>;
}
<span class="kw">match </span>(arr.validity(), n_nulls, arr.null_count() == <span class="number">0</span>) {
<span class="comment">// The null count is to avoid touching chunks with a validity mask but no nulls
</span>(<span class="kw">_</span>, <span class="prelude-val">Some</span>(<span class="number">0</span>), <span class="kw">_</span>) =&gt; <span class="kw">break</span>, <span class="comment">// No all-null rows, next chunk!
</span>(<span class="prelude-val">None</span>, <span class="kw">_</span>, <span class="kw">_</span>) | (<span class="kw">_</span>, <span class="kw">_</span>, <span class="bool-val">true</span>) =&gt; n_nulls = <span class="prelude-val">Some</span>(<span class="number">0</span>),
(<span class="prelude-val">Some</span>(v), <span class="kw">_</span>, <span class="kw">_</span>) =&gt; {
validity_agg =
validity_agg.map_or_else(|| <span class="prelude-val">Some</span>(v.clone()), |agg| <span class="prelude-val">Some</span>(v.bitor(<span class="kw-2">&amp;</span>agg)));
<span class="comment">// n.b. This is &quot;free&quot; since any bitops trigger a count.
</span>n_nulls = validity_agg.as_ref().map(|v| v.unset_bits());
}
}
}
<span class="comment">// we add the null count
</span><span class="kw">if let </span><span class="prelude-val">Some</span>(validity) = <span class="kw-2">&amp;</span>validity_agg {
null_count += validity.unset_bits()
}
<span class="comment">// all arrays are null arrays
// we add the length of the chunk to the null_count
</span><span class="kw">else if </span>all_null_array {
null_count += <span class="self">self</span>.fields()[<span class="number">0</span>].chunks()[i].len()
}
<span class="comment">// If it&#39;s none, every array was either Null-type or all null
</span><span class="self">self</span>.null_count += n_nulls.unwrap_or(<span class="self">self</span>.fields()[<span class="number">0</span>].chunks()[i].len());
}
<span class="self">self</span>.null_count = null_count
}

<span class="doccomment">/// Get access to one of this `[StructChunked]`&#39;s fields
Expand Down

0 comments on commit 74f2bcf

Please sign in to comment.