-
Notifications
You must be signed in to change notification settings - Fork 17
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Explicitly handle each node type #178
Changes from 20 commits
971e05a
963d0f0
f2ab7e6
b61e801
cecaddf
96058ff
c442eec
05d03ac
b0f8c80
736fd8a
31c6aa1
ee7a9f7
0782747
e73bea1
0df923c
95dea62
0d22ab8
781737d
cfb17d7
f242206
98b13d0
da4053b
7357c93
c45faae
4ec379b
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -75,14 +75,14 @@ func New(in ssa.Node, config classifier) *Source { | |
marked: make(map[ssa.Node]bool), | ||
config: config, | ||
} | ||
s.dfs(in, map[*ssa.BasicBlock]int{}, nil) | ||
s.dfs(in, map[*ssa.BasicBlock]int{}, nil, false) | ||
return s | ||
} | ||
|
||
// dfs performs Depth-First-Search on the def-use graph of the input Source. | ||
// While traversing the graph we also look for potential sanitizers of this Source. | ||
// If the Source passes through a sanitizer, dfs does not continue through that Node. | ||
func (s *Source) dfs(n ssa.Node, maxInstrReached map[*ssa.BasicBlock]int, lastBlockVisited *ssa.BasicBlock) { | ||
func (s *Source) dfs(n ssa.Node, maxInstrReached map[*ssa.BasicBlock]int, lastBlockVisited *ssa.BasicBlock, isReferrer bool) { | ||
if s.marked[n] { | ||
return | ||
} | ||
|
@@ -91,88 +91,156 @@ func (s *Source) dfs(n ssa.Node, maxInstrReached map[*ssa.BasicBlock]int, lastBl | |
return | ||
} | ||
|
||
if instr, ok := n.(ssa.Instruction); ok { | ||
instrIndex, ok := indexInBlock(instr) | ||
if !ok { | ||
return | ||
} | ||
|
||
// If the referrer is in a different block from the one we last visited, | ||
// and it can't be reached from the block we are visiting, then stop visiting. | ||
if lastBlockVisited != nil && instr.Block() != lastBlockVisited && !s.canReach(lastBlockVisited, instr.Block()) { | ||
return | ||
} | ||
|
||
// If this call's index is lower than the highest seen so far in its block, | ||
// then this call is "in the past". If this call is a referrer, | ||
// then we would be propagating taint backwards in time, so stop traversing. | ||
// (If the call is an operand, then it is being used as a value, so it does | ||
// not matter when the call occurred.) | ||
if _, ok := instr.(*ssa.Call); ok && instrIndex < maxInstrReached[instr.Block()] && isReferrer { | ||
return | ||
} | ||
} | ||
|
||
mirCopy := map[*ssa.BasicBlock]int{} | ||
for m, i := range maxInstrReached { | ||
mirCopy[m] = i | ||
} | ||
|
||
if instr, ok := n.(ssa.Instruction); ok { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm doing a second
These should really be replaced by calls to dedicated functions. I'm happy to do it in this PR, but I could also do it in a future PR. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I went ahead and extracted the first if (as well as other related instructions) into its own function. I think this reads much better. |
||
// If the referrer is in a different block from the one we last visited, | ||
// and it can't be reached from the block we are visiting, then stop visiting. | ||
if ib := instr.Block(); lastBlockVisited != nil && | ||
ib != lastBlockVisited && | ||
!s.canReach(lastBlockVisited, ib) { | ||
instrIndex, ok := indexInBlock(instr) | ||
if !ok { | ||
return | ||
} | ||
|
||
b := instr.Block() | ||
if i, ok := indexInBlock(instr); ok && mirCopy[b] < i { | ||
mirCopy[b] = i | ||
if mirCopy[instr.Block()] < instrIndex { | ||
mirCopy[instr.Block()] = instrIndex | ||
} | ||
lastBlockVisited = b | ||
|
||
lastBlockVisited = instr.Block() | ||
} | ||
|
||
s.preOrder = append(s.preOrder, n) | ||
s.marked[n] = true | ||
|
||
s.visitReferrers(n, mirCopy, lastBlockVisited) | ||
|
||
s.visitOperands(n, n.Operands(nil), mirCopy, lastBlockVisited) | ||
s.visit(n, mirCopy, lastBlockVisited) | ||
} | ||
|
||
func (s *Source) visitReferrers(n ssa.Node, maxInstrReached map[*ssa.BasicBlock]int, lastBlockVisited *ssa.BasicBlock) { | ||
referrers := s.referrersToVisit(n, maxInstrReached) | ||
func (s *Source) visit(n ssa.Node, maxInstrReached map[*ssa.BasicBlock]int, lastBlockVisited *ssa.BasicBlock) { | ||
if s.node == n { | ||
s.visitReferrers(n, maxInstrReached, lastBlockVisited) | ||
return | ||
} | ||
|
||
for _, r := range referrers { | ||
switch v := r.(type) { | ||
case *ssa.Call: | ||
if callee := v.Call.StaticCallee(); callee != nil && s.config.IsSanitizer(utils.DecomposeFunction(callee)) { | ||
s.sanitizers = append(s.sanitizers, &sanitizer.Sanitizer{Call: v}) | ||
} | ||
switch t := n.(type) { | ||
case *ssa.Alloc: | ||
// An Alloc represents the allocation of space for a variable. If a Node is an Alloc, | ||
// and the thing being allocated is not an array, then either: | ||
// a) it is a Source value, in which case it will get its own traversal when sourcesFromBlocks | ||
// finds this Alloc | ||
// b) it is not a Source value, in which case we should not visit it. | ||
// However, if the Alloc is an array, then that means the source that we are visiting from | ||
// is being placed into an array, slice or varags, so we do need to keep visiting. | ||
if _, isArray := utils.Dereference(t.Type()).(*types.Array); isArray { | ||
s.visitReferrers(n, maxInstrReached, lastBlockVisited) | ||
} | ||
|
||
case *ssa.Call: | ||
if callee := t.Call.StaticCallee(); callee != nil && s.config.IsSanitizer(utils.DecomposeFunction(callee)) { | ||
s.sanitizers = append(s.sanitizers, &sanitizer.Sanitizer{Call: t}) | ||
} | ||
|
||
s.dfs(r.(ssa.Node), maxInstrReached, lastBlockVisited) | ||
// This is to avoid attaching calls where the source is the receiver, ex: | ||
// core.Sinkf("Source id: %v", wrapper.Source.GetID()) | ||
if recv := t.Call.Signature().Recv(); recv != nil && s.config.IsSourceType(utils.DecomposeType(utils.Dereference(recv.Type()))) { | ||
return | ||
} | ||
Comment on lines
+174
to
+176
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why does this not also exclude, e.g., There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Good question. This is actually one of the more significant architectural flaws in the codebase right now, IMO. The reason is that As a side note, currently the propagation logic in There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Gotcha. And yeah, you know opinion on the pipeline of analyzers here. If these were unified in |
||
|
||
s.visitReferrers(n, maxInstrReached, lastBlockVisited) | ||
s.visitOperands(n, maxInstrReached, lastBlockVisited) | ||
|
||
case *ssa.FieldAddr: | ||
deref := utils.Dereference(t.X.Type()) | ||
typPath, typName := utils.DecomposeType(deref) | ||
fieldName := utils.FieldName(t) | ||
if !s.config.IsSourceField(typPath, typName, fieldName) { | ||
return | ||
} | ||
s.visitReferrers(n, maxInstrReached, lastBlockVisited) | ||
s.visitOperands(n, maxInstrReached, lastBlockVisited) | ||
|
||
// Only the Map itself can be tainted by an Update. | ||
// The Key can't be tainted. | ||
// The Value can propagate taint to the Map, but not receive it. | ||
case *ssa.MapUpdate: | ||
s.dfs(t.Map.(ssa.Node), maxInstrReached, lastBlockVisited, false) | ||
|
||
// The only Operand that can be tainted by a Send is the Chan. | ||
// The Value can propagate taint to the Chan, but not receive it. | ||
case *ssa.Send: | ||
s.dfs(t.Chan.(ssa.Node), maxInstrReached, lastBlockVisited, false) | ||
|
||
// These nodes' operands should not be visited, because they can only receive | ||
// taint from their operands, not propagate taint to them. | ||
case *ssa.BinOp, *ssa.ChangeInterface, *ssa.ChangeType, *ssa.Convert, *ssa.Extract, *ssa.MakeChan, *ssa.MakeMap, *ssa.MakeSlice, *ssa.Phi, *ssa.Range, *ssa.Slice, *ssa.UnOp: | ||
s.visitReferrers(n, maxInstrReached, lastBlockVisited) | ||
|
||
// These nodes don't have operands; they are Values, not Instructions. | ||
case *ssa.Const, *ssa.Global, *ssa.Lookup, *ssa.Parameter: | ||
s.visitReferrers(n, maxInstrReached, lastBlockVisited) | ||
|
||
// These nodes don't have referrers; they are Instructions, not Values. | ||
case *ssa.Go, *ssa.Store: | ||
s.visitOperands(n, maxInstrReached, lastBlockVisited) | ||
|
||
case *ssa.Index: | ||
s.visitReferrers(n, maxInstrReached, lastBlockVisited) | ||
s.dfs(t.X.(ssa.Node), maxInstrReached, lastBlockVisited, false) | ||
|
||
case *ssa.IndexAddr: | ||
s.visitReferrers(n, maxInstrReached, lastBlockVisited) | ||
s.dfs(t.X.(ssa.Node), maxInstrReached, lastBlockVisited, false) | ||
|
||
// These nodes are both Instructions and Values, and have no special restrictions. | ||
case *ssa.Field, *ssa.FreeVar, *ssa.MakeInterface, *ssa.Select, *ssa.TypeAssert: | ||
s.visitReferrers(n, maxInstrReached, lastBlockVisited) | ||
s.visitOperands(n, maxInstrReached, lastBlockVisited) | ||
|
||
// These nodes cannot propagate taint. | ||
case *ssa.Builtin, *ssa.DebugRef, *ssa.Defer, *ssa.Function, *ssa.If, *ssa.Jump, *ssa.MakeClosure, *ssa.Next, *ssa.Panic, *ssa.Return, *ssa.RunDefers: | ||
|
||
default: | ||
fmt.Printf("unexpected node received: %T %v; please report this issue\n", n, n) | ||
} | ||
} | ||
|
||
// referrersToVisit produces a filtered list of Referrers for an ssa.Node. | ||
// Specifically, we want to avoid referrers that shouldn't be visited, e.g. | ||
// because they would not be reachable in an actual execution of the program. | ||
func (s *Source) referrersToVisit(n ssa.Node, maxInstrReached map[*ssa.BasicBlock]int) (referrers []ssa.Instruction) { | ||
func (s *Source) visitReferrers(n ssa.Node, maxInstrReached map[*ssa.BasicBlock]int, lastBlockVisited *ssa.BasicBlock) { | ||
if n.Referrers() == nil { | ||
return | ||
} | ||
for _, r := range *n.Referrers() { | ||
if c, ok := r.(*ssa.Call); ok { | ||
// This is to avoid attaching calls where the source is the receiver, ex: | ||
// core.Sinkf("Source id: %v", wrapper.Source.GetID()) | ||
if recv := c.Call.Signature().Recv(); recv != nil && s.config.IsSourceType(utils.DecomposeType(utils.Dereference(recv.Type()))) { | ||
continue | ||
} | ||
|
||
// If this call's index is lower than the highest in its block, | ||
// then this call is "in the past" and we should stop traversing. | ||
i, ok := indexInBlock(r) | ||
if !ok { | ||
continue | ||
} | ||
if i < maxInstrReached[r.Block()] { | ||
continue | ||
} | ||
} | ||
|
||
if fa, ok := r.(*ssa.FieldAddr); ok { | ||
deref := utils.Dereference(fa.X.Type()) | ||
typPath, typName := utils.DecomposeType(deref) | ||
fieldName := utils.FieldName(fa) | ||
s.dfs(r.(ssa.Node), maxInstrReached, lastBlockVisited, true) | ||
} | ||
} | ||
|
||
if !s.config.IsSourceField(typPath, typName, fieldName) { | ||
continue | ||
} | ||
func (s *Source) visitOperands(n ssa.Node, maxInstrReached map[*ssa.BasicBlock]int, lastBlockVisited *ssa.BasicBlock) { | ||
for _, o := range n.Operands(nil) { | ||
if *o == nil { | ||
continue | ||
} | ||
|
||
referrers = append(referrers, r) | ||
s.dfs((*o).(ssa.Node), maxInstrReached, lastBlockVisited, false) | ||
} | ||
return referrers | ||
} | ||
|
||
func (s *Source) canReach(start *ssa.BasicBlock, dest *ssa.BasicBlock) bool { | ||
|
@@ -198,47 +266,6 @@ func (s *Source) canReach(start *ssa.BasicBlock, dest *ssa.BasicBlock) bool { | |
return false | ||
} | ||
|
||
func (s *Source) visitOperands(from ssa.Node, operands []*ssa.Value, maxInstrReached map[*ssa.BasicBlock]int, lastBlockVisited *ssa.BasicBlock) { | ||
// Do not visit Operands if the current node is an Extract. | ||
// This is to avoid incorrectly tainting non-Source values that are | ||
// produced by an Instruction that has a Source among the values it | ||
// produces, e.g. a call to a function with a signature like: | ||
// func NewSource() (*core.Source, error) | ||
// Which leads to a flow like: | ||
// Extract (*core.Source) --> Call (NewSource) --> error | ||
if _, ok := from.(*ssa.Extract); ok { | ||
return | ||
} | ||
|
||
for _, o := range operands { | ||
n, ok := (*o).(ssa.Node) | ||
if !ok { | ||
continue | ||
} | ||
|
||
// An Alloc represents the allocation of space for a variable. If a Node is an Alloc, | ||
// and the thing being allocated is not an array, then either: | ||
// a) it is a Source value, in which case it will get its own traversal when sourcesFromBlocks | ||
// finds this Alloc | ||
// b) it is not a Source value, in which case we should not visit it. | ||
// However, if the Alloc is an array, then that means the source that we are visiting from | ||
// is being placed into an array, slice or varags, so we do need to keep visiting. | ||
if al, isAlloc := (*o).(*ssa.Alloc); isAlloc { | ||
if _, isArray := utils.Dereference(al.Type()).(*types.Array); !isArray { | ||
continue | ||
} | ||
} | ||
|
||
// Don't traverse to the key in a lookup. | ||
// For example, if a map is tainted, looking up a value in the map | ||
// doesn't taint the key, so we shouldn't traverse to the key. | ||
if look, ok := from.(*ssa.Lookup); ok && *o == look.Index { | ||
continue | ||
} | ||
s.dfs(n, maxInstrReached, lastBlockVisited) | ||
} | ||
} | ||
|
||
// compress removes the elements from the graph that are not required by the | ||
// taint-propagation analysis. Concretely, only propagators, sanitizers and | ||
// sinks should constitute the output. Since, we already know what the source | ||
|
@@ -374,7 +401,7 @@ func sourcesFromBlocks(fn *ssa.Function, conf classifier) []*Source { | |
} | ||
|
||
// source obtained through a field or an index operation | ||
case *ssa.Field, *ssa.FieldAddr, *ssa.IndexAddr, *ssa.Lookup: | ||
case *ssa.Field, *ssa.FieldAddr, *ssa.Index, *ssa.IndexAddr, *ssa.Lookup: | ||
|
||
// source chan or map (arrays and slices have regular Allocs) | ||
case *ssa.MakeMap, *ssa.MakeChan: | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This function needed to be modified because the check for a call that is a referrer used to be done in
visitReferrers
. SinceCall
is the only special case, it feels cleaner to do it here. Other changes fell out of that change, because this function became unwieldy.