diff --git a/crates/polars-ops/src/chunked_array/strings/namespace.rs b/crates/polars-ops/src/chunked_array/strings/namespace.rs index e8e08906b84d..ac3842f0c2e5 100644 --- a/crates/polars-ops/src/chunked_array/strings/namespace.rs +++ b/crates/polars-ops/src/chunked_array/strings/namespace.rs @@ -418,18 +418,31 @@ pub trait Utf8NameSpaceImpl: AsUtf8 { /// Return the first n characters fn str_head(&self, n: i64) -> PolarsResult { let ca = self.as_utf8(); - + // if n is negative, we return all but the last abs(n) characters let chunks = if n < 0 { - let n = n.abs() as u64; - ca - .downcast_iter() - .map(|c| substring(c, 0, &Some(c.len() as u64 - n))) - .collect::>()? + let abs_n = n.abs() as u64; + ca.downcast_iter() + .map(|c| { + // a negative n requires that we collect a different substring length + // for each item. + polars_arrow::export::arrow::array::Utf8Array::from_iter_values(c.iter().map( + |s| { + match s { + Some(s) => { + // saturating_sub prevents length < 0 + let s_len = (s.len() as u64).saturating_sub(abs_n); + Some(&s[0..s_len]) + } + None => s, + } + }, + )) + }) + .collect::>()? } else { let n = n as u64; - ca - .downcast_iter() + ca.downcast_iter() .map(|c| substring(c, 0, &Some(n as u64))) .collect::>()? }; diff --git a/py-polars/debug/launch.py b/py-polars/debug/launch.py new file mode 100644 index 000000000000..cd1ddba9ec3c --- /dev/null +++ b/py-polars/debug/launch.py @@ -0,0 +1,66 @@ +import os +import re +import sys +import time +from pathlib import Path + + +def launch_debugging() -> None: + """ + Debug Rust files via Python. + + Determine the pID for the current debugging session, attach the Rust LLDB launcher, + and execute the originally-requested script. + """ + if len(sys.argv) == 1: + raise RuntimeError( + "launch.py is not meant to be executed directly; please use the `Python: " + "Debug Rust` debugging configuration to run a python script that uses the " + "polars library." + ) + + # get the current process ID + pID = os.getpid() + + # print to the console to allow the "Rust LLDB" routine to pick up on the signal + launch_file = Path(__file__).parents[2] / ".vscode/launch.json" + if not launch_file.exists(): + raise RuntimeError(f"Cannot locate {launch_file}") + with launch_file.open("r") as f: + launch_info = f.read() + + # overwrite the pid found in launch.config with the pid for the current process + # match initial the "Rust LLDB" definition with the pid immediately after + pattern = re.compile('("Rust LLDB",\\s*"pid":\\s*")\\d+(")') + found = pattern.search(launch_info) + if not found: + raise RuntimeError( + "Cannot locate pid definition in launch.json for Rust LLDB configuration. " + "Please follow the instructions in CONTRIBUTING.md for creating the " + "launch configuration." + ) + + launch_info_with_new_pid = pattern.sub(rf"\g<1>{pID}\g<2>", launch_info) + with launch_file.open("w") as f: + f.write(launch_info_with_new_pid) + + # print pID to the debug console. This auto-triggers the Rust LLDB configurations. + print(f"pID = {pID}") + + # give the LLDB time to connect. We may have to play with this setting. + time.sleep(1) + + # run the originally requested file + # update sys.argv so that when exec() is called, it's populated with the requested + # script name in sys.argv[0], and the remaining args after + sys.argv.pop(0) + with Path(sys.argv[0]).open() as fh: + script_contents = fh.read() + + # path to the script to be executed + fh = Path(sys.argv[0]) + exec(compile(script_contents, fh, mode="exec"), {"__name__": "__main__"}) + + +if __name__ == "__main__": + launch_debugging()