-
Notifications
You must be signed in to change notification settings - Fork 111
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
RSDK-9839: Have Go module clients be webrtc aware. #4751
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -25,6 +25,10 @@ import ( | |
// a resource may register with a SharedConn which supports WebRTC. | ||
type OnTrackCB func(tr *webrtc.TrackRemote, r *webrtc.RTPReceiver) | ||
|
||
//nolint | ||
// The following describes the SharedConn lifetime for viam-server's modmanager communicating with | ||
// modules it has spawned. | ||
// | ||
// SharedConn wraps both a GRPC connection & (optionally) a peer connection & controls access to both. | ||
// For modules, the grpc connection is over a Unix socket. The WebRTC `PeerConnection` is made | ||
// separately. The `SharedConn` continues to implement the `rpc.ClientConn` interface by pairing up | ||
|
@@ -71,20 +75,70 @@ type SharedConn struct { | |
// `peerConnMu` synchronizes changes to the underlying `peerConn`. Such that calls consecutive | ||
// calls to `GrpcConn` and `PeerConn` will return connections from the same (or newer, but not | ||
// prior) "generations". | ||
peerConnMu sync.Mutex | ||
peerConn *webrtc.PeerConnection | ||
peerConnReady <-chan struct{} | ||
peerConnClosed <-chan struct{} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Was unused |
||
peerConnMu sync.Mutex | ||
peerConn *webrtc.PeerConnection | ||
peerConnReady <-chan struct{} | ||
// peerConnFailed gets closed when a PeerConnection fails to connect. The peerConn pointer is | ||
// set to nil before this channel is closed. | ||
peerConnFailed chan struct{} | ||
|
||
onTrackCBByTrackNameMu sync.Mutex | ||
onTrackCBByTrackName map[string]OnTrackCB | ||
|
||
// isConnectedToViamServer identifies whether this SharedConn is running inside a viam-server | ||
// talking to a module, or a module talking to a viam-server. We use this to determine whether | ||
// to use long names or short names for PeerConnection video (or audio) track names. | ||
isConnectedToViamServer bool | ||
|
||
logger logging.Logger | ||
} | ||
|
||
// NewSharedConnForModule acts as a constructor for `SharedConn` for modules that are communicating | ||
// back to their parent viam-server. | ||
func NewSharedConnForModule(grpcConn rpc.ClientConn, peerConn *webrtc.PeerConnection, logger logging.Logger) *SharedConn { | ||
// We must be passed a ready connection. | ||
pcReady := make(chan struct{}) | ||
close(pcReady) | ||
|
||
ret := &SharedConn{ | ||
peerConn: peerConn, | ||
peerConnReady: pcReady, | ||
// We were passed in a ready connection. Only create this for when `Close` is called. | ||
peerConnFailed: make(chan struct{}), | ||
onTrackCBByTrackName: make(map[string]OnTrackCB), | ||
isConnectedToViamServer: true, | ||
logger: logger, | ||
} | ||
ret.grpcConn.ReplaceConn(grpcConn) | ||
|
||
ret.peerConn.OnTrack(func(trackRemote *webrtc.TrackRemote, rtpReceiver *webrtc.RTPReceiver) { | ||
ret.onTrackCBByTrackNameMu.Lock() | ||
onTrackCB, ok := ret.onTrackCBByTrackName[trackRemote.StreamID()] | ||
ret.onTrackCBByTrackNameMu.Unlock() | ||
if !ok { | ||
msg := "Callback not found for StreamID: %s, keys(resOnTrackCBs): %#v" | ||
ret.logger.Errorf(msg, trackRemote.StreamID(), maps.Keys(ret.onTrackCBByTrackName)) | ||
return | ||
} | ||
onTrackCB(trackRemote, rtpReceiver) | ||
}) | ||
|
||
return ret | ||
} | ||
|
||
// IsConnectedToModule returns whether this shared conn is being used to communicate with a module. | ||
func (sc *SharedConn) IsConnectedToModule() bool { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I created two methods for this because I didn't think it was obvious this was indeed binary? And I like to read these descriptions in the affirmative tense |
||
return !sc.isConnectedToViamServer | ||
} | ||
|
||
// IsConnectedToViamServer returns whether this shared conn is being used to communicate with a | ||
// viam-server. Note this implies the client is running within a module process. Typical | ||
// clients/remote connections are a pure webrtc connection. As opposed to a frankenstein tcp/unix | ||
// socket + webrtc connection. | ||
func (sc *SharedConn) IsConnectedToViamServer() bool { | ||
return sc.isConnectedToViamServer | ||
} | ||
|
||
// Invoke forwards to the underlying GRPC Connection. | ||
func (sc *SharedConn) Invoke( | ||
ctx context.Context, | ||
|
@@ -143,8 +197,8 @@ func (sc *SharedConn) PeerConn() *webrtc.PeerConnection { | |
return ret | ||
} | ||
|
||
// ResetConn acts as a constructor for `SharedConn`. ResetConn replaces the underlying | ||
// connection objects in addition to some other initialization. | ||
// ResetConn acts as a constructor for `SharedConn` inside the viam-server (not modules). ResetConn | ||
// replaces the underlying connection objects in addition to some other initialization. | ||
// | ||
// The first call to `ResetConn` is guaranteed to happen before any access to connection objects | ||
// happens. But subequent calls can be entirely asynchronous to components/services accessing | ||
|
@@ -193,7 +247,16 @@ func (sc *SharedConn) ResetConn(conn rpc.ClientConn, moduleLogger logging.Logger | |
} | ||
|
||
sc.peerConn = peerConn | ||
sc.peerConnReady, sc.peerConnClosed, err = rpc.ConfigureForRenegotiation(peerConn, rpc.PeerRoleClient, sc.logger) | ||
// When communicating with modules, we may both call `AddStream` on the module (we are the | ||
// client) _and_ the module may call `AddStream` on us (for example, to stream video data from a | ||
// different module). Thus we must use the `PeerRoleServer` role such that we install an | ||
// `OnNegotiationNeeded` callback for when the stream server handle for `AddStream` attempts to | ||
// call `peerConn.AddTrack`. | ||
// | ||
// That said, it's not been rigorously exercised that when both ends initiate a renegotiation | ||
// for different video tracks, that we end up in a state where both video tracks are | ||
// successfully created. | ||
sc.peerConnReady, _, err = rpc.ConfigureForRenegotiation(peerConn, rpc.PeerRoleServer, sc.logger) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I have to double check if we need PeerRoleServer or Client. And I ought to document the consequence of this choice. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I've confirmed this is in fact needed. |
||
if err != nil { | ||
sc.logger.Warnw("Unable to create optional renegotiation channel for module. Ignoring.", "err", err) | ||
return | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -16,6 +16,7 @@ import ( | |
grpc_retry "github.com/grpc-ecosystem/go-grpc-middleware/retry" | ||
"github.com/jhump/protoreflect/desc" | ||
"github.com/jhump/protoreflect/grpcreflect" | ||
"github.com/viamrobotics/webrtc/v3" | ||
"go.uber.org/multierr" | ||
"go.uber.org/zap" | ||
"go.uber.org/zap/zapcore" | ||
|
@@ -112,6 +113,9 @@ type RobotClient struct { | |
// webrtc. We don't want a network disconnect to result in reconnecting over tcp such that | ||
// performance would be impacted. | ||
serverIsWebrtcEnabled bool | ||
|
||
pc *webrtc.PeerConnection | ||
sharedConn *grpc.SharedConn | ||
} | ||
|
||
// RemoteTypeName is the type name used for a remote. This is for internal use. | ||
|
@@ -291,6 +295,9 @@ func New(ctx context.Context, address string, clientLogger logging.ZapCompatible | |
rpc.WithStreamClientInterceptor(streamClientInterceptor()), | ||
) | ||
|
||
// If we're a client running as part of a module, we annotate our requests with our module | ||
// name. That way the receiver (e.g: viam-server) can execute logic based on where a request | ||
// came from. Such as knowing what WebRTC connection to add a video track to. | ||
if rOpts.modName != "" { | ||
inter := &grpc.ModInterceptors{ModName: rOpts.modName} | ||
rc.dialOptions = append(rc.dialOptions, rpc.WithUnaryClientInterceptor(inter.UnaryClientInterceptor)) | ||
|
@@ -680,10 +687,10 @@ func (rc *RobotClient) ResourceByName(name resource.Name) (resource.Resource, er | |
func (rc *RobotClient) createClient(name resource.Name) (resource.Resource, error) { | ||
apiInfo, ok := resource.LookupGenericAPIRegistration(name.API) | ||
if !ok || apiInfo.RPCClient == nil { | ||
return grpc.NewForeignResource(name, &rc.conn), nil | ||
return grpc.NewForeignResource(name, rc.getClientConn()), nil | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There's a comment at the top of the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yep -- need to unfold up a few lines: https://github.com/viamrobotics/rdk/pull/4751/files#diff-d6a5d93563cdb968513a4bc0246d93a5568b7233c639bfd2ba3e1d42c3925083R667 |
||
} | ||
logger := rc.Logger().Sublogger(resource.RemoveRemoteName(name).ShortName()) | ||
return apiInfo.RPCClient(rc.backgroundCtx, &rc.conn, rc.remoteName, name, logger) | ||
return apiInfo.RPCClient(rc.backgroundCtx, rc.getClientConn(), rc.remoteName, name, logger) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. For @benjirewis 's edification, this is the thing we pass into the (camera) client constructor. Having this (newly added) It's certainly the intention that only module programs go through this code path (due to calling What's probably a better change (but a bit more movement/copying/uncertainty), that we can iterate towards, is having There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah that vaguely makes sense to me.
I suppose we've been treating the module -> rdk client connection as a regular Golang SDK client, but I can see it's now diverging slightly. I don't have as much context as you, but I might push to continue to use There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Well, our other option is to use gRPC over WebRTC for the module connections in which case this would work without modifications! Once we moved to webrtc and used its video tracks, rdk client behavior started to diverge. This patch shoves that detail of how module connections are special into the robot Client. But by just calling (or not calling) SetPeerConnection. The only reason I prefer to not litter robot client with that detail is because of the whole "it's part of the public interface" for applications. I don't think I'd have an opinion if the two kinds of clients were distinct. |
||
} | ||
|
||
func (rc *RobotClient) resources(ctx context.Context) ([]resource.Name, []resource.RPCAPI, error) { | ||
|
@@ -1299,6 +1306,27 @@ func (rc *RobotClient) Tunnel(ctx context.Context, conn io.ReadWriteCloser, dest | |
return errors.Join(err, readerSenderErr, recvWriterErr) | ||
} | ||
|
||
// SetPeerConnection is only to be called internally from modules. | ||
func (rc *RobotClient) SetPeerConnection(pc *webrtc.PeerConnection) { | ||
rc.mu.Lock() | ||
rc.pc = pc | ||
rc.mu.Unlock() | ||
} | ||
|
||
func (rc *RobotClient) getClientConn() rpc.ClientConn { | ||
// Must be called with `rc.mu` in ReadLock+ mode. | ||
if rc.sharedConn != nil { | ||
return rc.sharedConn | ||
} | ||
|
||
if rc.pc == nil { | ||
return &rc.conn | ||
} | ||
|
||
rc.sharedConn = grpc.NewSharedConnForModule(&rc.conn, rc.pc, rc.logger.Sublogger("shared_conn")) | ||
return rc.sharedConn | ||
} | ||
|
||
func unaryClientInterceptor() googlegrpc.UnaryClientInterceptor { | ||
return func( | ||
ctx context.Context, | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The logic change for knowing when to use a full resource name