bin/o-generate-gephi-network-file

#!/usr/bin/env python
# -*- coding: utf-8 -*-

# Copyright (C) 2010 - 2012, A. Murat Eren
#
# This program is free software; you can redistribute it and/or modify it under
# the terms of the GNU General Public License as published by the Free
# Software Foundation; either version 2 of the License, or (at your option)
# any later version.
#
# Please read the COPYING file.

import sys
import Oligotyping.utils.utils as utils


def main(environment_file, sample_mapping_file = None, unit_mapping_file = None, min_abundance = 0, min_sum_normalized_percent = 1,
         sample_size = 8, unit_size = 2, skip_unit_labels = False, skip_sample_labels = False):
    samples_dict = utils.get_samples_dict_from_environment_file(environment_file)
    oligos = utils.get_oligos_sorted_by_abundance(samples_dict, min_abundance = min_abundance)
    unit_counts, unit_percents = utils.get_unit_counts_and_percents(oligos, samples_dict)
    
    if sample_mapping_file:
        sample_mapping = utils.get_sample_mapping_dict(sample_mapping_file)

    if unit_mapping_file:
        unit_mapping = utils.get_sample_mapping_dict(unit_mapping_file)

    if len(environment_file.split('.')) > 1:
        output_file = '.'.join(environment_file.split('.')[:-1]) + '.gexf'
    else:
        output_file = environment_file + '.gexf'

    utils.generate_gexf_network_file(oligos,
                                     samples_dict, 
                                     unit_percents, 
                                     output_file, 
                                     sample_mapping_dict = sample_mapping if sample_mapping_file else None,
                                     unit_mapping_dict = unit_mapping if unit_mapping_file else None,
                                     sample_size = sample_size,
                                     unit_size = unit_size,
                                     skip_sample_labels = skip_sample_labels,
                                     skip_unit_labels = skip_unit_labels)


if __name__ == '__main__':
    import argparse

    parser = argparse.ArgumentParser(description='Generates a Gephi input file')
    parser.add_argument('environment_file', metavar = 'FILE',
                        help = 'Environment file that is generated by the pipeline')
    parser.add_argument('--sample-mapping', metavar = 'FILE', default = None,
                        help = 'Providing a sample mapping file will make Gephi file much more\
                                useful.')
    parser.add_argument('--unit-mapping', metavar = 'FILE', default = None,
                        help = 'Structurally, unit mapping is identical to sample mapping file,\
                                instead, it describes properties of units.')
    parser.add_argument('--sample-size', metavar = 'INT', type = int, default = 8,
                        help = 'Sample node size. Default: %(default)d')
    parser.add_argument('--unit-size', metavar = 'INT', type = int, default = 2,
                        help = 'Unit node size. Default: %(default)d')
    parser.add_argument('--min-abundance', metavar = 'INT', type = int, default = 0,
                        help = 'Minimum abundance of a unit to be included in the network.\
                                It usually a good idea to give some cut-off since each unit\
                                (whether it is an oligotype or an MED node) is going to be a\
                                part of the network (total number of reads divided by 10,000 might\
                                be a good start).')
    parser.add_argument('--min-sum-normalized-percent', metavar = 'INT', type = int, default = 1,
                        help = 'This defines the minimum sum normalized percent for an oligotype or MED\
                                node in a sample to form an edge in the network. Sum normalization takes\
                                an oligotype or MED node, generates a vector from its percent occurence in all\
                                samples, then normalizes the percent abundances so the total of the vector adds\
                                up to 100%%. The default is %(default)s, but it might be a good idea to set it\
                                to 0 for samples with a lot of samples (such as more than 100 samples).')
    parser.add_argument('--skip-sample-labels', action = 'store_true', default = False,
                    help = 'Leave sample labels blank.')
    parser.add_argument('--skip-unit-labels', action = 'store_true', default = False,
                    help = 'Leave unit labels blank.')


    args = parser.parse_args()

    
    sys.exit(main(args.environment_file, args.sample_mapping, args.unit_mapping, args.min_abundance, args.min_sum_normalized_percent,
                  sample_size = args.sample_size, unit_size = args.unit_size, skip_unit_labels = args.skip_unit_labels,
                  skip_sample_labels = args.skip_sample_labels))