Coordinating asynchronous calls through multiple callbacks (v2)

In README.md there is a discussion of coordinating asynchronous calls using a data structure and (implicitly) multiple callback functions. This is a second version of that code that breaks things up a bit more in an effort to be a little clearer about what's doing what, and in particular why we have to construct a zillion functions along the way.

The initial setup is still the same:

var credentials = require('./credentials.json');

var mysql=require("mysql");

credentials.host="ids";

var connection = mysql.createConnection(credentials);
var data={};
var processed={}

sql = "SHOW DATABASES";
connection.query(sql,function(err,rows,fields){ //connection.connect() is run automatically for a query
  if (err) {
    console.log('Error looking up databases');
    connection.end();
  } else {
    processDBFs(rows); //Gets called once... so it is safe!
  }
});

Now we have a somewhat simplified version of processDBFs from the README:

function processDBFs(dbfs){ // Asynchronous row handler
  console.log("in processDBFs");
  for(var index in dbfs) {
    var dbf = dbfs[index].Database;
    var sql = 'SHOW TABLES IN '+ dbf;
    data[dbf] = Number.POSITIVE_INFINITY; //Exists, but not set.
    var callback = constructTablesCallback(dbf);
    connection.query(sql, callback);
  } // do NOT put a connection.end() here.  It will kill all queued queries.
}

The major change here is this uses a new function constructTablesCallback (provided below) to construct the callback used in the query. In the README version, the callback function is declared anonymously in the body of processDBFs, which rather clutters things up. Here we call a function (constructTablesCallback) to construct the desired callback function, which is then passed to the query.

One thing that this helps point out is that we aren't creating one callback function – we're creating many, one for every database returned by the SHOW DATABASES query. And that's arguably the key point here. If we have only one callback function, through something like:

  callback = (err, tables, fields) => {
    console.log('in inner func of processDBFs with dbf = ' + dbf);
    if (err) {
      console.log('Error finding tables in dbf ' + dbf);
      connection.end();
    } else {
      console.log('about to process tables for DB ' + dbf);
      console.log('with tables ' + JSON.stringify(tables));
      processTables(tables, dbf);
    }
  }

then that one callback function will have only one value of the variable dbf, which will be used by all the calls to that callback. Because of the evaluation mechanism used by JavaScript, the one value of dbf will in fact be the last database name we get from SHOW DATABASES. This will lead ot all manner of problems because we'll be trying to attach tables from other databases to that last database, which will typically fail.

And here's the code that constructs those callbacks:

function constructTablesCallback(dbf) {
  console.log('in dbf for ' + dbf);
  return function(err,tables, fields) {
    console.log('in inner func of processDBFs with dbf = ' + dbf);
    if (err) {
      console.log('Error finding tables in dbf ' + dbf);
      connection.end();
    } else {
      console.log('about to process tables for DB ' + dbf);
      processTables(tables, dbf);
    }
  }
}

This constructs and returns a new function, which "captures" the variable dbf in its scope. Since that has a different value for every call to constructTablesCallback, each of those callback functions will have their own separate value for dbf, avoiding the confusing tangle of crazy we'd get if we had a single callback.

We can then do something similar for processTables():

function processTables(tables, dbf) { // Asynchronous row handler
  data[dbf] = tables.length; // Now it is set.
  processed[dbf] = 0;    // And has not yet been used as a label.
  for(var index in tables){
    var tableObj = tables[index];
    for(key in tableObj) {
      var table = tableObj[key];
      table = dbf+"."+table;
      var sql = 'DESCRIBE '+table;
      var callback = processDescribeCallback(table, dbf);
      connection.query(sql, callback);
    }
  }
}

function processDescribeCallback(table, dbf) {
  return function(err, desc, fields) {
    if (err) {
      console.log('Error describing table '+ table);
    } else {
      processDescription(desc,table,dbf);
    }
  };
}

Add in the printing and bookkeeping and we have a complete working version, using datastructures and careful callback construction to handle the asynchrony:

function processDescription(desc, table, dbf) {
  data[dbf]--; //Processed one table
  if (processed[dbf]==0) {
    processed[dbf] = 1;
    console.log('---|'+dbf+'>');
  }
  console.log('.....|'+dbf+'.'+table+'>');
  desc.map(function(field){ // show the fields nicely
    console.log("\tFieldName: `"+field.Field+"` \t("+field.Type+")");
  });

  if(allZero(data)){connection.end()}
}

function allZero(object){
  allzero = true;
  for(obj in object){
    if(object[obj]!=0){allzero = false}
  }
  return(allzero);
}

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

MultipleCallbackCoordination.md

MultipleCallbackCoordination.md

Coordinating asynchronous calls through multiple callbacks (v2)

Files

MultipleCallbackCoordination.md

Latest commit

History

MultipleCallbackCoordination.md

File metadata and controls

Coordinating asynchronous calls through multiple callbacks (v2)