diff --git a/Readme.md b/Readme.md
index 31075c1..3883ef2 100644
--- a/Readme.md
+++ b/Readme.md
@@ -1,4 +1,6 @@
 # TrackCluster
+![PyPI](https://img.shields.io/pypi/v/trackcluster?color=green)
+
 Trackcluster is an isoform calling and quantification pipeline for Nanopore direct-RNA long reads.
 
 ## Table of Contents
@@ -41,6 +43,15 @@ pip install ./trackcluster
 # test if all dependencies are installed
 trackrun.py test --install
 
+# prepare the reference annotation bed file from gff file
+# tested on Ensembl, WormBase and Arapost gff
+gff2bigg.py -i ensemblxxxx.gff3 -o ref.bed 
+# WormBase full gff contains too many information, need to extract the lines from WormBase only
+cat c_elegans.PRJNA13758.WS266.annotations.gff3 |grep WormBase > ws266.gff
+gff2bigg.py -i ws266.gff -o ref.bed
+# the ref.bed can be sorted to speed up the analysis
+bedtools sort -i ref.bed > refs.bed # refs.bed contains the sorted, know transcripts from gff annotation
+
 # generate the read track from minimap2 bam file
 bam2bigg.py -b group1.bam -o group1.bed
 bam2bigg.py -b group2.bam -o group2.bed
@@ -50,12 +61,16 @@ cat group1.bed group2.bed > read.bed
 bedtools sort -i read.bed > reads.bed
 
 # Examples for running commands:
-trackrun.py clusterj -s reads.bed -r refs.bed -t 40 # run in junction mode, generate the isoform.bed
+trackrun.py clusterj -s reads.bed -r refs.bed -t 40 # run in junction mode, will generate the isoform.bed
 trackrun.py count -s reads.bed -r refs.bed -i isoform.bed # generate the csv file for isoform expression
+# alternative for cluster
+trackrun.py cluster -s reads.bed -r refs.bed -t 40 # run in exon/intron intersection mode， slower, will generate the isoform.bed
+
+# the post analysis could include the classification of novel isoforms
 trackrun.py desc --isoform isoform.bed --reference ref.bed > desc.bed  # generate the description for each novel isoform
+# this part can be run directly on reads, to count the frequency of splicing events in reads, like intron_retention
+trackrun.py desc --isoform reads.bed --reference ref.bed > reads_desc.bed 
 
-# alternative for cluster
-trackrun.py cluster -s reads.bed -r refs.bed -t 40 # run in exon/intron intersection mode， slower
 ```
 
 
diff --git a/script/gff2bigg.py b/script/gff2bigg.py
index 08489a9..9386118 100755
--- a/script/gff2bigg.py
+++ b/script/gff2bigg.py
@@ -7,7 +7,9 @@
 parentdir = os.path.dirname(os.path.dirname(currentdir))
 sys.path.insert(0,parentdir)
 
-from trackcluster import convert
+from trackcluster.convert import gff_to_bigGenePred
+from trackcluster.gff import GFF
+from trackcluster.tracklist import write_bigg
 
 parser=argparse.ArgumentParser()
 parser.add_argument("-i", "--gff",
@@ -15,20 +17,16 @@
 parser.add_argument("-o", "--out", default="bigg.bed",
                     help="the output bigGenePred file name")
 
-parser.add_argument("-k", "--key", default="Name",
-                    help="The key used as gene name in gff line, like Name=let-1")
+parser.add_argument("-k", "--key", default="ID",
+                    help="The key used as gene name in gff line attr column, default is ID, which is used in most Ensembl gff."
+                         "like ID=gene:ENSMUSG00000064842;Name=let-1")
 
 args = parser.parse_args(args=None if sys.argv[1:] else ['--help'])
 
 # make a file using the functions
 
-fw=open(args.out, "w")
+gff = GFF(args.gff)
+bigg_list = gff_to_bigGenePred(gff, indicator=args.key)
 
-gff = convert.GFF(args.gff, args.key)
-bigg_list = convert.gff_to_bigGenePred(gff)
+write_bigg(bigg_list, args.out)
 
-for bigg in bigg_list:
-    fw.write(bigg.to_str())
-    fw.write("\n")
-
-fw.close()
diff --git a/script/trackrun.py b/script/trackrun.py
index afbde8f..0a5608a 100644
--- a/script/trackrun.py
+++ b/script/trackrun.py
@@ -106,17 +106,17 @@ def cluster(self):
         #args = parser.parse_args(args=None if sys.argv[2:] else ['--help'])
 
         flow_cluster_all_gene_novel(wkdir=args.folder,
-                                     prefix=args.prefix,
-                                     nano_bed=args.sample,
-                                     gff_bed=args.reference,
-                                     core=args.thread,
-                                     f1=args.intersect1,
-                                     f2=args.intersect2,
-                                     count_cutoff=args.count,
-                                     batchsize=args.batchsize,
-                                    intronweight=args.intronweight,
-                                    cutoff1=args.cutoff1,
-                                    cutoff2=args.cutoff2)
+                                 prefix=args.prefix,
+                                 nano_bed=args.sample,
+                                 gff_bed=args.reference,
+                                 core=args.thread,
+                                 f1=args.intersect1,
+                                 f2=args.intersect2,
+                                 count_cutoff=args.count,
+                                 batchsize=args.batchsize,
+                                 intronweight=args.intronweight,
+                                 cutoff1=args.cutoff1,
+                                 cutoff2=args.cutoff2)
 
 
     def clusterj(self):
diff --git a/test/convert_test.py b/test/convert_test.py
index d10ea56..b872fc9 100644
--- a/test/convert_test.py
+++ b/test/convert_test.py
@@ -36,6 +36,29 @@ def test_cigar_count(self):
 
         print((cigar_count(cigar_tuple)))
 
+    def test_ensembl(self):
+        ensembl_file = "/data/reference/mouse/Mus_musculus.GRCm39.107.chr.gff3"
+        gff=GFF(ensembl_file)
+        bigglist=gff_to_bigGenePred(gff)
+        print(len(bigglist))
+
+    def test_tair(self):
+        # 52059 mRNA
+        gff_file = "/data/reference/tair/GCF_000001735.4_TAIR10.1_genomic.gff"
+        gff=GFF(gff_file)
+        bigglist=gff_to_bigGenePred(gff)
+        print(len(bigglist))
+
+    def test_wormbase(self):
+        gff_file = "/data/reference/cel/ws266.gff"
+        gff=GFF(gff_file)
+        gff.transcript_format("ID")
+        #print(gff.transcript_to_gene["T"])
+        bigglist=gff_to_bigGenePred(gff)
+        print(len(bigglist))
+
+
+
 
     def tearDown(self):
         self.bigg = None
\ No newline at end of file
diff --git a/test/gff_test.py b/test/gff_test.py
index 887977e..a7c8503 100644
--- a/test/gff_test.py
+++ b/test/gff_test.py
@@ -13,16 +13,33 @@ class GFFTest(unittest.TestCase):
     def setUp(self):
         #self.gff = GFF("/home/zhaolab1/reference/ce10_ws266.gff")
         self.gff=GFF("./genes/unc52/unc52.gff")
+        self.attr_str="ID=Transcript:ZC101.2e;Parent=Gene:WBGene00006787;Name=ZC101.2e;wormpep=WP:CE18424;locus=unc-52"
 
     def test_format_write(self):
         self.gff.gene_format()
         print((len(list(self.gff.gene_d.keys()))))
         print((list(self.gff.gene_d.keys())[0:10]))
-        self.gff.gff_write("./genes/unc52/unc52_write.gff", keys=["ZC101.2"])
+        self.gff.gff_write("./genes/unc52/unc52_write_2.gff")
         print("test gff IO done")
 
-    def test_parser(self):
-        print((parse_attributes("ID=Transcript:ZC101.2e;Parent=Gene:WBGene00006787;Name=ZC101.2e;wormpep=WP:CE18424;locus=unc-52")))
+    def test_parser_attr(self):
+        print(parse_attributes(self.attr_str))
+
+    def test_attr2str(self):
+        attr_dic=parse_attributes(self.attr_str)
+        attr_str_new=attr_dic_to_str(attr_dic)
+        print(attr_str_new)
+        print(self.attr_str)
+
+    def test_parser_gff_and_reverse(self):
+        self.gff.to_list()
+        gff_one=self.gff.gff_string_list[1]
+        gff_record=parse_gff_line(gff_one)
+        print(gff_record)
+        gff_str=gff_record_to_str(gff_record)
+        print(gff_one)
+        print(gff_str)
+
 
     def test_to_transcript(self):
         self.gff.transcript_format()
@@ -31,6 +48,54 @@ def test_to_transcript(self):
         print((self.gff.transcript_d))
         print("Finish to_transcript test")
 
+    def test_ensembl(self):
+        """
+        test the gff performace in ensembl gff format
+        :return:
+        """
+        ensembl_file="/data/reference/mouse/Mus_musculus.GRCm39.107.chr.gff3"
+        gff=GFF(ensembl_file)
+        gff.to_list()
+        gff.gene_format()
+        gff.transcript_format()
+        print(len(gff.gene_d), len(gff.transcript_d))
+
+        def __test_gene():
+            """
+            test code for ensembl mouse gff
+            :return:
+            """
+            keys=["gene:ENSMUSG00000114538","gene:ENSMUSG00000103506" ]
+            for k in keys:
+                print(k)
+                print(gene2line[k])
+                for i in gene2line[k]:
+                    print(self.gff_records[i])
+
+
+
+    def test_ucsc(self):
+        """
+        todo: this need a gtg parser
+        test the gff convert from ucsc gtf or gff
+        :return:
+        """
+        pass
+
+
+    def test_wormbase(self):
+        pass
+
+    def test_genbank(self):
+        """
+        test the gff format from genbank bioproject
+        :return:
+        """
+        pass
+
+
+
+
 
     def tearDown(self):
         self.bigg = None
diff --git a/test/track_test.py b/test/track_test.py
index 3c1889d..5f00a06 100644
--- a/test/track_test.py
+++ b/test/track_test.py
@@ -13,7 +13,7 @@
 class Test(unittest.TestCase):
     def setUp(self):
         genes=["unc52", "AT1G06860", "AT2G02100", "AT2G43410"]
-        gene=genes[-1]
+        gene=genes[0]
 
         bigg_nano=[]
         with open("./genes/{gene}/{gene}_nano.bed".format(gene=gene)) as f:
@@ -131,7 +131,7 @@ def __test_bindseq_reverse(self):
     def test_bindseq_forward(self):
         #ref_dict = fasta2dic("/home/li/reference/tair/tair10.fa")
         mrna_dict = fasta2dic("./genes/AT2G02100/AT2G02100.fasta")
-        pass
+
 
     def test_mrna_pos_to_chro(self):
         sample = self.bigg[0]
@@ -143,11 +143,12 @@ def test_mrna_pos_to_chro(self):
         ### check if the nucl of the same pos is indentical
         ### chro is 0 based and mRNA is 1 based
 
-    def __test_orfs(self):
+    def test_orfs(self):
         """
         can only run with ce10 ref
         """
-        ref_dict=fasta2dic("/home/zhaolab1/reference/ce10_ucsc.fa")
+        # ref_dic is a large external file
+        ref_dict=fasta2dic("/data/reference/ce10_ucsc.fa")
         bigg_one=self.bigg[30]
         bigg_one.bind_chroseq(ref_dict, gap=0, intron=False)
         print((bigg_one.seq_chro))
diff --git a/test/utils_test.py b/test/utils_test.py
index 1cea3a2..58973f0 100644
--- a/test/utils_test.py
+++ b/test/utils_test.py
@@ -36,6 +36,9 @@ def test_is_bin_in(self):
 
         myexe("/bin/bash -i -c minimap2")
 
+    def test_is_package_installed(self):
+        print(is_package_installed("Bio"))
+
     def test_summary(self):
         logger = log_summary()
         logger.info("This is just a test, print number 100 as float" + "," + str(float(1) * 100))
diff --git a/trackcluster/__init__.py b/trackcluster/__init__.py
index 0a75a58..d899c04 100644
--- a/trackcluster/__init__.py
+++ b/trackcluster/__init__.py
@@ -1,5 +1,5 @@
 # For relative imports to work in Python 3.6
 import os, sys
 sys.path.append(os.path.dirname(os.path.realpath(__file__)))
-__version__="0.1.1"
+__version__="0.1.2"
 __all__ = []
diff --git a/trackcluster/convert.py b/trackcluster/convert.py
index a8f0690..f55c21a 100644
--- a/trackcluster/convert.py
+++ b/trackcluster/convert.py
@@ -117,7 +117,7 @@ def cigar_count(cigar_tuple):
 
 #### another function
 
-def gff_to_bigGenePred(gff):
+def gff_to_bigGenePred(gff, indicator="ID"):
     """
     Note gff is 1 start and bigGenePred is 0
     :param gff: a GFF class with only one gene inside
@@ -127,11 +127,15 @@ def gff_to_bigGenePred(gff):
     bigg_list=[]
 
     if gff.transcript_d is None:
-        gff.transcript_format()
+        gff.transcript_format(indicator=indicator)
+    #print(list(gff.transcript_d.keys()))
 
     for key in list(gff.transcript_d.keys()):
         bigg = bigGenePred()
-        gene=gff.transcript_to_gene[key]
+        try:
+            gene=gff.transcript_to_gene[key]
+        except KeyError:
+            print("KeyERROR", key)
 
         for n, record in enumerate(gff.transcript_d[key]):
             #use the first line to get basic infor
diff --git a/trackcluster/flow.py b/trackcluster/flow.py
index 926a6ae..a06b035 100644
--- a/trackcluster/flow.py
+++ b/trackcluster/flow.py
@@ -431,4 +431,25 @@ def flow_cluster_all_gene_novel(wkdir, prefix,nano_bed, gff_bed, core=30,f1=0.01
 ########################################################################################################
 
 def flow_annotation(wkdir, ):
-    pass
\ No newline at end of file
+    pass
+
+
+############
+def flow_map_convert_clusterj_count(wkdir, prefix, ref_fasta, fastq_l, nano_bed, gff_bed, core=30,
+                                 f1=0.01, f2=0.01, count_cutoff=5, batchsize=2000):
+    """
+    The overall full run pipeline for impatient people
+    :param wkdir:
+    :param prefix:
+    :param ref_fasta:
+    :param fastq_l:
+    :param nano_bed:
+    :param gff_bed:
+    :param core:
+    :param f1:
+    :param f2:
+    :param count_cutoff:
+    :param batchsize:
+    :return:
+    """
+    pass
diff --git a/trackcluster/gff.py b/trackcluster/gff.py
index c641902..0a4556c 100644
--- a/trackcluster/gff.py
+++ b/trackcluster/gff.py
@@ -46,7 +46,19 @@ def parse_gff_line(line):
             'attributes': parse_attributes(line_l[8])
         }
 
-    return GFF_record(**data)
+        return GFF_record(**data)
+
+def gff_record_to_str(record):
+    """
+    reverse function for parse_gff_line
+    :param record:
+    :return:
+    """
+    attr_str=attr_dic_to_str(record.attributes)
+    line_l=list(record)
+    line_l[-1]=attr_str
+    line_str=["." if x is None else str(x) for x in line_l] # change the None back to .
+    return "\t".join(line_str)
 
 
 def parse_attributes(attr_string):
@@ -64,30 +76,54 @@ def parse_attributes(attr_string):
     else:
         attr_dic = OrderedDict()
         for attribute in attr_string.strip().split(";"):
-            if len(attribute) > 0:
-                elems = attribute.strip().split('=')
-                key = elems[0]
-                value = ' '.join(elems[1:])
-                if value[0] == '"':
-                    value = value[1:]
-                if value[-1] == '"':
-                    value = value[0:-1]
-                attr_dic[key] = value
+            if len(attribute) > 0 :
+                if "=" in attribute.strip():
+                    elems = attribute.strip().split('=')
+                    key = elems[0]
+                    value = ' '.join(elems[1:])
+                    if value[0] == '"':
+                        value = value[1:]
+                    if value[-1] == '"':
+                        value = value[0:-1]
+                    attr_dic[key] = value
     return attr_dic
 
+def attr_dic_to_str(attr_dic):
+    """
+    reverse function of parser_attr
+    :param attr_dic:
+    :return: string str for a valid gff file
+    """
+    if len(attr_dic)==0:
+        return ""
+    else:
+        line_l=[]
+        for k, v in attr_dic.items():
+            str_one=k+"="+v
+            line_l.append(str_one)
+        return ";".join(line_l)
+
+
 
 class GFF(object):
     """
     A general class to parse the gff file
     For speed issue, just read all lines into mem
+
+    parser the gff using
+
     """
 
-    def __init__(self, gfffile, indicator="sequence_name"):
+    def __init__(self, gfffile):
+
+        """
+        :param gfffile:
+        """
 
         # init in file reading
         self.filename=gfffile
         self.gff_string_list = []
-        self.gff_to_list() # here use list for the flexibility instead of generator
+        self.to_list() # here use list for the flexibility instead of generator
 
         self.gff_records=[] # hold multiple GFF_record in a list
 
@@ -96,9 +132,8 @@ def __init__(self, gfffile, indicator="sequence_name"):
         self.transcript_to_gene=None
         self.transcript_d=None
 
-        self.indicator=indicator
 
-    def gff_to_list(self):
+    def to_list(self):
         """
         :return: gfflines list
         """
@@ -106,73 +141,135 @@ def gff_to_list(self):
             lines = f.readlines()
             for line in lines:
                 if line[0] != "#" and len(line) > 18: # ignore the # and small lines
-                    self.gff_string_list.append(line)
+                    self.gff_string_list.append(line.strip())
+
+    def parser(self):
+        if len(self.gff_string_list)==0:
+            self.to_list()
+        else:
+            for line in self.gff_string_list:
+                self.gff_records.append(parse_gff_line(line))
 
-    def gene_format(self):
+    def gene_format(self, indicator="ID"):
         """
         format the gff line to blocks,
         {gene name, [mRNAline, cdsline...]}
 
+        :param indicator: usually be "ID", "Name", "sequence_name", the name used in gene line, which will be further used in exon line and/or cds line
+
         the gff need to be sorted and all gene/mRNA/exon/cds/UTR is together
-        """
-        indicator=self.indicator
 
-        gene_l=self.gff_string_list
 
+        select the gene lines, including the
+        gene, mRNA, exon,CDS, UTR
+        exclude the other lines, may need to add the other lines like pseudogene,
+        should try to include all keys containing an exon
+        switch to bottom-up method to avoid non-exon information
+        :return:
+        """
+        if len(self.gff_records)==0:
+            self.parser()
+
+        ####
+        parent2line_dic={} # dic for {namexxx:[1,2,3...]}. collect the level 1 ID and parent information
+        nontop_s=set() # set to store all lines numbers, which contains parent in attr
+        id2pos={} # {namexxx:2}
+        pos2id={}
+
+        # collect all three dic
+        for n, record in enumerate(self.gff_records):
+            try:
+                id = record.attributes[indicator]
+                id2pos[id] = n
+                pos2id[n]=id
+            except KeyError:
+                pass
+
+            try: # record all parent
+                p1=record.attributes["Parent"]
+                try:
+                    parent2line_dic[p1].append(n)
+                except KeyError:
+                    parent2line_dic[p1]=[n]
+                nontop_s.add(n)
+            except KeyError:
+                pass
+
+        #print(parent2line_dic)
+        #print(nontop_s)
+        #print(id2pos)
+
+        # add the top key to in id2line to the gene2line
+        # for ensembl gff, the top line with indicator="ID" will keep only genes
+        # usually the line is one mRNA line
+        gene2line={}
+        for id, pos_l in parent2line_dic.items():
+            try:
+                if id2pos[id] not in nontop_s or len(pos_l)==0: # some gff will have missing parent line, it is wrong but it happens
+                    gene2line[id]=pos_l
+            except KeyError:
+                pass
+        #print("gene2line,",len(gene2line), "parent2line", len(parent2line_dic))
+        #print_dic(gene2line)
+
+        # reverse the line2gene
+        line2gene={}
+        for gene, pos_l in gene2line.items():
+            for pos in pos_l:
+                try:
+                    record=self.gff_records[pos]
+                    name=record.attributes[indicator]
+                    line2gene[name]=gene
+                except KeyError:
+                    pass
+
+        #add the nontop ones to top keys
+        # use id2line to glue all untop line and their subline to gene2line
+        # the value in gene2line need to be expaned
+        for id, pos_l in parent2line_dic.items():
+            try:
+                if id2pos[id] in nontop_s: # not top key, has parent, need to add to gene2line
+                    try:
+                        gene=line2gene[id] #
+                        #print("in", gene)
+                        gene2line[gene].extend(pos_l)
+                    except KeyError:
+                        pass
+            except KeyError:
+                pass
+        # create the gene_d
         gene_d = OrderedDict()
-        for line in gene_l:
-            line_l = line.split("\t")
-
-            gff_type = line_l[2]
-            attributes = line_l[-1].strip()
-
-            if gff_type == "gene":
-                for attribute in attributes.split(";"):
-                    if indicator in attribute: # the key "sequence_name" only for wormbase gff
-                        kk = attribute.split("=")[1]
-                        gene_d[kk] = []
-                        gene_d[kk].append(line)
-
-            else:
-                for attribute in attributes.split(";"):
-                    if "ID" in attribute:
-                        parent = attribute.split("=")[1]
-                        if kk in parent:
-                            gene_d[kk].append(line)
-                            break
-                    if "Parent" in attribute:
-                        parent = attribute.split("=")[1]
-                        if kk in parent:
-                            gene_d[kk].append(line)
-                            break
+        for k, v in gene2line.items():
+            gene_d[k]=[]
+            line_gene=id2pos[k]
+            gene_d[k].append(self.gff_records[line_gene])
+
+            line_other=v
+            for i in line_other:
+                gene_d[k].append(self.gff_records[i])
         self.gene_d=gene_d
 
-    def transcript_format(self, keys=None):
+    def transcript_format(self, indicator="ID", keys=None):
         transcript_to_gene={} # {mrna_name: genename}
         transcript_d=OrderedDict() # {mrna_name: gff_line}
 
         if self.gene_d is None:
-            self.gene_format()
+            self.gene_format(indicator=indicator)
 
         if keys is None: # if no chose ones, use all mrna
             keys=list(self.gene_d.keys())
 
         for k in keys:
-            gff_list=self.gene_d[k]
+            record_list=self.gene_d[k] # use record instead of gff
 
             # first test the type of the gene by test if CDS is in the gff_line
-            type_set=set([x.split("\t")[2] for x in gff_list])
+            type_set=set([x.type for x in record_list])
             is_protein_coding=True if "CDS" in type_set else False
             exon_junction=set()
 
-            for line in gff_list:
-                record=parse_gff_line(line)
-
+            for record in record_list:
                 if record.type == "exon":
-                    try:
-                        transcript_name = record.attributes["Parent"].split(":")[1]  # only for wormbase
-                    except IndexError:
-                        transcript_name = record.attributes["Parent"]  # general
+                    transcript_name = record.attributes["Parent"]  # general
                     transcript_to_gene[transcript_name] = k
 
                     try:
@@ -184,23 +281,6 @@ def transcript_format(self, keys=None):
                     exon_junction.add(record.start)
                     exon_junction.add(record.end)
 
-            for line in gff_list:
-                # exon can be used to record genes from non-coding genes
-                if is_protein_coding: # need to add the not in exon cds to the key
-                    record=parse_gff_line(line)
-                    if record.type=="CDS" and (record.start not in exon_junction) and (record.end not in exon_junction): # cds can have multiple parents
-                        parent_l= record.attributes["Parent"].split(",")
-                        for parent in parent_l:
-                            try:
-                                transcript_name=parent.split(":")[1] # only for wormbase
-                            except IndexError:
-                                transcript_name=parent # only for wormbase
-                            #transcript_to_gene[transcript_name]=k
-                            try:
-                                transcript_d[transcript_name].append(record)
-                            except KeyError:
-                                transcript_d[transcript_name]=[]
-                                transcript_d[transcript_name].append(record)
 
         for k, v in list(transcript_d.items()):
             v.sort(key=attrgetter('start'))
@@ -209,9 +289,17 @@ def transcript_format(self, keys=None):
         self.transcript_d=transcript_d
 
     def gff_write(self, out, keys=None):
+        """
+        write the formatted or the original gff to a new file
+        :param out:
+        :param keys:
+        :return:
+        """
 
-        def write_line():
-            fw.write(line)
+        def write_line(gff_record):
+            gff_str = gff_record_to_str(gff_record)
+            fw.write(gff_str)
+            fw.write("\n")
 
         fw=open(out,"w")
 
@@ -220,22 +308,10 @@ def write_line():
 
         for k in keys:
             print(k)
-            v=self.gene_d[k]
-            for line in v:
-                if "gene" in line.split("\t")[2]:
-                    write_line()
-            for line in v:
-                if "mRNA" in line.split("\t")[2]:
-                    write_line()
-            for line in v:
-                if "exon" in line.split("\t")[2]:
-                    write_line()
-            for line in v:
-                if "CDS" in line.split("\t")[2]:
-                    write_line()
-            for line in v:
-                if "UTR" in line.split("\t")[2]:
-                    write_line()
+            record_l=self.gene_d[k]
+            for record in record_l:
+                write_line(record)
+
         fw.close()
 
 
diff --git a/trackcluster/track.py b/trackcluster/track.py
index dc8a429..f4622bd 100644
--- a/trackcluster/track.py
+++ b/trackcluster/track.py
@@ -404,7 +404,6 @@ def find_orfs_with_trans(self, trans_table=1, min_protein_length=10):
             return None
         else:
             from Bio.Seq import Seq
-            from Bio.Alphabet import generic_dna
             seq=Seq(self.seq_chro)
 
             seq0=seq.translate(table=1)
diff --git a/trackcluster/utils.py b/trackcluster/utils.py
index 6b6d3c2..e23dce5 100644
--- a/trackcluster/utils.py
+++ b/trackcluster/utils.py
@@ -252,6 +252,7 @@ def get_file_location(filepath):
 
 def list2file(ll, genename_file):
     """
+    IO function
     write a list to a file
     :param ll:
     :param genename_file:
@@ -265,6 +266,7 @@ def list2file(ll, genename_file):
 
 def file2list(genename_file):
     """
+    IO function
     reverse of file2list
     :param genename_file:
     :return:
@@ -273,4 +275,11 @@ def file2list(genename_file):
     with open(genename_file, "r") as f:
         for line in f.readlines():
             gene_l.append(line.strip())
-    return gene_l
\ No newline at end of file
+    return gene_l
+
+def print_dic(dic, n=100):
+    """
+    test code, print the first 100 line of dic
+    """
+    for k in list(dic.keys())[:n]:
+        print(k, dic[k])
\ No newline at end of file