Skip to content

Commit

Permalink
reduce overhead for some bytevector->string calls
Browse files Browse the repository at this point in the history
Allocate minimum-sized buffers for transcoded port and codec when the
bytevector argument is less than file-buffer-size. Avoid extra
allocation and copy of result in that case. Take advantage of shared
buffer if user has determined this is safe.
  • Loading branch information
owaddell-beckman committed Aug 2, 2023
1 parent f11ad12 commit 821879d
Show file tree
Hide file tree
Showing 4 changed files with 54 additions and 25 deletions.
2 changes: 2 additions & 0 deletions LOG
Original file line number Diff line number Diff line change
Expand Up @@ -2395,3 +2395,5 @@
append-blocks procedures used by get-bytevector-all, get-bytevector-n,
$get-string-all, and get-string-n
mats/io.ms release_notes/release_notes.stex s/io.ss
- reduce allocation and copying for certain cases in bytevector->string
mats/io.ms release_notes/release_notes.stex s/io.ss
11 changes: 11 additions & 0 deletions mats/io.ms
Original file line number Diff line number Diff line change
Expand Up @@ -4019,6 +4019,7 @@

(mat encode/decode-consistency
(parameters
[file-buffer-size 1 (file-buffer-size)] ;; aim to hit one-block? #t and #f cases of $get-string-all
[transcoded-port-buffer-size 1 (transcoded-port-buffer-size)]
[make-codec-buffer (lambda (bp) (make-bytevector 4)) (make-codec-buffer)])
; verify that encoding/decoding is consistent (but not necessarily correct)
Expand Down Expand Up @@ -5255,6 +5256,15 @@
;; requested count is more than file-buffer-size
(check open-file-input-port (lambda (ip) (get-bytevector-n ip (fx+ (file-buffer-size) 1)))
make-bytevector bytevector-length (string->utf8 test-str))
;; bytevector->string case where input bv-len <= file-buffer-size
(let ([bv (string->utf8 test-str)] [tx (native-transcoder)])
(define (check-b->s buffer-size)
(parameterize ([file-buffer-size buffer-size])
(let-values ([(actual bytes) (bytes-for (lambda () (bytevector->string bv tx)))])
(assert (equal? actual test-str))
bytes)))
(< (check-b->s (bytevector-length bv))
(check-b->s (fx- (bytevector-length bv) 1))))
;; bytevector->string one-block vs. two-block case
(let* ([test-str "\x1F311;\x1F312;\x1F313;\x1F314;\x1F315;\x1F316;\x1F317;\x1F318;"]
[bv (string->utf8 test-str)]
Expand All @@ -5269,6 +5279,7 @@
(assert (equal? actual test-str))
bytes)))
;; ensure that we can distinguish one- and two-block case
;; without hitting the input bv-len <= file-buffer-size case
(assert (> bv-len one-block-buffer-len))
(< (check-b->s one-block-buffer-len)
(predicted-one-block (check-b->s two-block-buffer-len)
Expand Down
4 changes: 4 additions & 0 deletions release_notes/release_notes.stex
Original file line number Diff line number Diff line change
Expand Up @@ -2933,6 +2933,10 @@ type were defined in the importing library.

\subsection{Reduced allocation and copying}

When given a bytevector whose length is less than \scheme{file-buffer-size},
\scheme{bytevector->string} now allocates the minimum size string buffer,
internal ioffsets fxvector, and internal codec buffer.
The size of each was formerly hardcoded at 1024.
The
\scheme{bytevector->string},
\scheme{get-bytevector-all},
Expand Down
62 changes: 37 additions & 25 deletions s/io.ss
Original file line number Diff line number Diff line change
Expand Up @@ -3454,8 +3454,9 @@ implementation notes:
(eof-object)
(let ([index (binary-port-input-index p)]
[count (fxmin count port-count)])
(bytevector-copy! (binary-port-input-buffer p) index
bv start count)
(unless (and (fx= index start) (eq? bv (binary-port-input-buffer p)))
(bytevector-copy! (binary-port-input-buffer p) index
bv start count))
(set-binary-port-input-index! p (fx+ index count))
count))))]
[clear-input
Expand Down Expand Up @@ -3954,27 +3955,26 @@ implementation notes:
(loop (caar blocks) (cdar blocks) (cdr blocks) end)))))))

(define $get-string-all
(lambda (who textual-input-port)
(let ([buffer-size (file-buffer-size)])
(let ([get-some (port-handler-get-some ($port-handler textual-input-port))])
(let loop ([size 0]
[next-block-index 0]
[next-block (make-string buffer-size)]
[blocks '()])
(let ([next-size (get-some who textual-input-port
next-block next-block-index
(fx- buffer-size next-block-index))])
(if (eof-object? next-size)
(if (eq? size 0)
(eof-object)
(append-blocks size next-block-index next-block blocks))
(let ([size (fx+ size next-size)]
[next-block-index (fx+ next-block-index next-size)])
(if (fx>= next-block-index (fxquotient buffer-size 2))
(loop size 0
(make-string buffer-size)
(cons (cons next-block-index next-block) blocks))
(loop size next-block-index next-block blocks))))))))))
(lambda (who textual-input-port buffer-size one-block?)
(let ([get-some (port-handler-get-some ($port-handler textual-input-port))])
(let loop ([size 0]
[next-block-index 0]
[next-block (make-string buffer-size)]
[blocks '()])
(let ([next-size (get-some who textual-input-port
next-block next-block-index
(fx- buffer-size next-block-index))])
(if (eof-object? next-size)
(if (eq? size 0)
(eof-object)
(append-blocks size next-block-index next-block blocks))
(let ([size (fx+ size next-size)]
[next-block-index (fx+ next-block-index next-size)])
(if (and (not one-block?) (fx>= next-block-index (fxquotient buffer-size 2)))
(loop size 0
(make-string buffer-size)
(cons (cons next-block-index next-block) blocks))
(loop size next-block-index next-block blocks)))))))))

(set-who! get-string-n
(lambda (textual-input-port count)
Expand Down Expand Up @@ -4055,15 +4055,27 @@ implementation notes:
(lambda (textual-input-port)
(unless (and (input-port? textual-input-port) (textual-port? textual-input-port))
($oops who "~s is not a textual input port" textual-input-port))
($get-string-all who textual-input-port)))
($get-string-all who textual-input-port (file-buffer-size) #f)))

(set-who! bytevector->string
(lambda (bv tx)
(unless (bytevector? bv)
($oops who "~s is not a bytevector" bv))
(unless ($transcoder? tx)
($oops who "~s is not a transcoder" tx))
(let ([str ($get-string-all who (open-bytevector-input-port bv tx))])
(let* ([bv-len (bytevector-length bv)]
[default (file-buffer-size)]
;; We only call transcoded-port's get-some handler and always
;; with our own buffer, so minimize the port's unused string
;; buffer and associated ioffsets fxvector.
[bp (parameterize ([transcoded-port-buffer-size 1])
(open-bytevector-input-port bv tx))]
;; With current codecs, bv-len is an upper bound on the length of str.
;; We leave room for one exta char so that $get-string-all can loop to
;; realize it's at eof.
[str (if (and (fx<= bv-len default) (fx< bv-len (most-positive-fixnum)))
($get-string-all who bp (fx+ bv-len 1) #t)
($get-string-all who bp default #f))])
(if (eof-object? str) "" str))))
)

Expand Down

0 comments on commit 821879d

Please sign in to comment.