Skip to content

Commit

Permalink
[core] Use str_switch optimization in state::GetValue()
Browse files Browse the repository at this point in the history
This moved the needle on a few benchmarks!  Including the fib workload,
e.g.  benchmarks2/gc-cachegrind.

And you can see it in the raw-vm profile of fib.
  • Loading branch information
Andy C committed Mar 11, 2024
1 parent e56b698 commit 1a2934d
Showing 1 changed file with 151 additions and 145 deletions.
296 changes: 151 additions & 145 deletions core/state.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,8 @@
from frontend import match
from mycpp import mops
from mycpp import mylib
from mycpp.mylib import log, print_stderr, tagswitch, iteritems, NewDict
from mycpp.mylib import (log, print_stderr, str_switch, tagswitch, iteritems,
NewDict)
from osh import split
from pylib import os_path
from pylib import path_stat
Expand Down Expand Up @@ -1835,152 +1836,157 @@ def GetValue(self, name, which_scopes=scope_e.Shopt):
# COMPUTED_VARS = {'PIPESTATUS': 1, 'FUNCNAME': 1, ...}
# if name not in COMPUTED_VARS: ...

if name == 'ARGV':
# TODO: ARGV can be a normal mutable variable in YSH
items = [value.Str(s)
for s in self.GetArgv()] # type: List[value_t]
return value.List(items)

# "Registers"
if name == '_status':
return num.ToBig(self.TryStatus())

if name == '_error':
return self.TryError()

if name == '_this_dir':
if len(self.this_dir) == 0:
# e.g. osh -c '' doesn't have it set
# Should we give a custom error here?
# If you're at the interactive shell, 'source mymodule.oil' will still
# work because 'source' sets it.
return value.Undef
with str_switch(name) as case:
if case('ARGV'):
# TODO: ARGV can be a normal mutable variable in YSH
items = [value.Str(s)
for s in self.GetArgv()] # type: List[value_t]
return value.List(items)

# "Registers"
elif case('_status'):
return num.ToBig(self.TryStatus())

elif case('_error'):
return self.TryError()

elif case('_this_dir'):
if len(self.this_dir) == 0:
# e.g. osh -c '' doesn't have it set
# Should we give a custom error here?
# If you're at the interactive shell, 'source mymodule.oil' will still
# work because 'source' sets it.
return value.Undef
else:
return value.Str(self.this_dir[-1]) # top of stack

elif case('PIPESTATUS'):
strs2 = [str(i)
for i in self.pipe_status[-1]] # type: List[str]
return value.BashArray(strs2)

elif case('_pipeline_status'):
items = [num.ToBig(i) for i in self.pipe_status[-1]]
return value.List(items)

elif case('_process_sub_status'): # Oil naming convention
items = [num.ToBig(i) for i in self.process_sub_status[-1]]
return value.List(items)

elif case('BASH_REMATCH'):
top_match = self.regex_match[-1]
with tagswitch(top_match) as case2:
if case2(regex_match_e.No):
groups = [] # type: List[str]
elif case2(regex_match_e.Yes):
m = cast(RegexMatch, top_match)
groups = util.RegexGroups(m.s, m.indices)
return value.BashArray(groups)

# Do lookup of system globals before looking at user variables. Note: we
# could optimize this at compile-time like $?. That would break
# ${!varref}, but it's already broken for $?.

elif case('FUNCNAME'):
# bash wants it in reverse order. This is a little inefficient but we're
# not depending on deque().
strs = [] # type: List[str]
for frame in reversed(self.debug_stack):
UP_frame = frame
with tagswitch(frame) as case2:
if case2(debug_frame_e.Call):
frame = cast(debug_frame.Call, UP_frame)
strs.append(frame.func_name)

elif case2(debug_frame_e.Source):
# bash doesn't tell you the filename sourced
strs.append('source')

elif case2(debug_frame_e.Main):
strs.append('main') # also bash behavior

return value.BashArray(strs) # TODO: Reuse this object too?

# $BASH_SOURCE and $BASH_LINENO have OFF BY ONE design bugs:
#
# ${BASH_LINENO[$i]} is the line number in the source file
# (${BASH_SOURCE[$i+1]}) where ${FUNCNAME[$i]} was called (or
# ${BASH_LINENO[$i-1]} if referenced within another shell function).
#
# https://www.gnu.org/software/bash/manual/html_node/Bash-Variables.html

elif case('BASH_SOURCE'):
strs = []
for frame in reversed(self.debug_stack):
UP_frame = frame
with tagswitch(frame) as case2:
if case2(debug_frame_e.Call):
frame = cast(debug_frame.Call, UP_frame)

# Weird bash behavior
assert frame.def_tok.line is not None
source_str = ui.GetLineSourceString(
frame.def_tok.line)
strs.append(source_str)

elif case2(debug_frame_e.Source):
frame = cast(debug_frame.Source, UP_frame)
# Is this right?
strs.append(frame.source_name)

elif case2(debug_frame_e.Main):
frame = cast(debug_frame.Main, UP_frame)
strs.append(frame.dollar0)

return value.BashArray(strs) # TODO: Reuse this object too?

elif case('BASH_LINENO'):
strs = []
for frame in reversed(self.debug_stack):
UP_frame = frame
with tagswitch(frame) as case2:
if case2(debug_frame_e.Call):
frame = cast(debug_frame.Call, UP_frame)
strs.append(_LineNumber(frame.call_tok))

elif case2(debug_frame_e.Source):
frame = cast(debug_frame.Source, UP_frame)
strs.append(_LineNumber(frame.call_tok))

elif case2(debug_frame_e.Main):
# Bash does this to line up with 'main'
strs.append('0')

return value.BashArray(strs) # TODO: Reuse this object too?

elif case('LINENO'):
assert self.token_for_line is not None
# Reuse object with mutation
# TODO: maybe use interned GetLineNumStr?
self.line_num.s = str(self.token_for_line.line.line_num)
return self.line_num

elif case('BASHPID'): # TODO: YSH io->getpid()
return value.Str(str(posix.getpid()))

elif case('_'):
return value.Str(self.last_arg)

elif case('SECONDS'):
return value.Int(
mops.FromFloat(time_.time() - self.seconds_start))

else:
return value.Str(self.this_dir[-1]) # top of stack

if name == 'PIPESTATUS':
strs2 = [str(i) for i in self.pipe_status[-1]] # type: List[str]
return value.BashArray(strs2)

if name == '_pipeline_status':
items = [num.ToBig(i) for i in self.pipe_status[-1]]
return value.List(items)

if name == '_process_sub_status': # Oil naming convention
items = [num.ToBig(i) for i in self.process_sub_status[-1]]
return value.List(items)

if name == 'BASH_REMATCH':
top_match = self.regex_match[-1]
with tagswitch(top_match) as case:
if case(regex_match_e.No):
groups = [] # type: List[str]
elif case(regex_match_e.Yes):
m = cast(RegexMatch, top_match)
groups = util.RegexGroups(m.s, m.indices)
return value.BashArray(groups)

# Do lookup of system globals before looking at user variables. Note: we
# could optimize this at compile-time like $?. That would break
# ${!varref}, but it's already broken for $?.

if name == 'FUNCNAME':
# bash wants it in reverse order. This is a little inefficient but we're
# not depending on deque().
strs = [] # type: List[str]
for frame in reversed(self.debug_stack):
UP_frame = frame
with tagswitch(frame) as case:
if case(debug_frame_e.Call):
frame = cast(debug_frame.Call, UP_frame)
strs.append(frame.func_name)

elif case(debug_frame_e.Source):
# bash doesn't tell you the filename sourced
strs.append('source')

elif case(debug_frame_e.Main):
strs.append('main') # also bash behavior

return value.BashArray(strs) # TODO: Reuse this object too?

# $BASH_SOURCE and $BASH_LINENO have OFF BY ONE design bugs:
#
# ${BASH_LINENO[$i]} is the line number in the source file
# (${BASH_SOURCE[$i+1]}) where ${FUNCNAME[$i]} was called (or
# ${BASH_LINENO[$i-1]} if referenced within another shell function).
#
# https://www.gnu.org/software/bash/manual/html_node/Bash-Variables.html

if name == 'BASH_SOURCE':
strs = []
for frame in reversed(self.debug_stack):
UP_frame = frame
with tagswitch(frame) as case:
if case(debug_frame_e.Call):
frame = cast(debug_frame.Call, UP_frame)

# Weird bash behavior
assert frame.def_tok.line is not None
source_str = ui.GetLineSourceString(frame.def_tok.line)
strs.append(source_str)

elif case(debug_frame_e.Source):
frame = cast(debug_frame.Source, UP_frame)
# Is this right?
strs.append(frame.source_name)

elif case(debug_frame_e.Main):
frame = cast(debug_frame.Main, UP_frame)
strs.append(frame.dollar0)

return value.BashArray(strs) # TODO: Reuse this object too?

if name == 'BASH_LINENO':
strs = []
for frame in reversed(self.debug_stack):
UP_frame = frame
with tagswitch(frame) as case:
if case(debug_frame_e.Call):
frame = cast(debug_frame.Call, UP_frame)
strs.append(_LineNumber(frame.call_tok))

elif case(debug_frame_e.Source):
frame = cast(debug_frame.Source, UP_frame)
strs.append(_LineNumber(frame.call_tok))

elif case(debug_frame_e.Main):
# Bash does this to line up with 'main'
strs.append('0')

return value.BashArray(strs) # TODO: Reuse this object too?

if name == 'LINENO':
assert self.token_for_line is not None
# Reuse object with mutation
# TODO: maybe use interned GetLineNumStr?
self.line_num.s = str(self.token_for_line.line.line_num)
return self.line_num

if name == 'BASHPID': # TODO: YSH io->getpid()
return value.Str(str(posix.getpid()))

if name == '_':
return value.Str(self.last_arg)

if name == 'SECONDS':
return value.Int(mops.FromFloat(time_.time() - self.seconds_start))

# In the case 'declare -n ref='a[42]', the result won't be a cell. Idea to
# fix this:
# 1. Call self.unsafe_arith.ParseVarRef() -> BracedVarSub
# 2. Call self.unsafe_arith.GetNameref(bvs_part), and get a value_t
# We still need a ref_trail to detect cycles.
cell, _, _ = self._ResolveNameOrRef(name, which_scopes)
if cell:
return cell.val
# In the case 'declare -n ref='a[42]', the result won't be a cell. Idea to
# fix this:
# 1. Call self.unsafe_arith.ParseVarRef() -> BracedVarSub
# 2. Call self.unsafe_arith.GetNameref(bvs_part), and get a value_t
# We still need a ref_trail to detect cycles.
cell, _, _ = self._ResolveNameOrRef(name, which_scopes)
if cell:
return cell.val

return value.Undef
return value.Undef

def GetCell(self, name, which_scopes=scope_e.Shopt):
# type: (str, scope_t) -> Cell
Expand Down

0 comments on commit 1a2934d

Please sign in to comment.