fbaumdicker · emilpaulitz · Jun 5, 2024 · Jul 14, 2024 · Jul 14, 2024 · Jul 14, 2024
diff --git a/README.md b/README.md
@@ -50,6 +50,11 @@ cd goldfinder
 python -m pip install -r requirements.txt
 ```
 
+If you run into an error message like `error: command 'gcc' failed: No such file or directory`, gcc might not be installed. Check this with gcc --version.
+
+On Windows, if pip throws an error "Cannot open include file: 'io.h': No such file or directory", you might need to install Microsoft C++ compiler. Get it here: https://visualstudio.microsoft.com/visual-cpp-build-tools/
+And also tick SDK for Desktop C++. Refer to: https://stackoverflow.com/questions/40018405/cannot-open-include-file-io-h-no-such-file-or-directory
+
 ##### Dependencies:
 `Bio`==1.6.0 \
 `DendroPy`==4.6.1 \
@@ -174,7 +179,7 @@ Miscellaneous:
 #### `association_clusters.txt`
 This file defines gene clusters as found by Markov clustering based on association scores. Each cluster starts with `>` followed by clulster ID and its size. In the following lines, all genes contained in the cluster are listed.
 
-#### `{score}_{association/dissociation}_significant_pairs.txt`
+#### `{score}_{association/dissociation}_significant_pairs.csv`
 This comma-separated file lists all gene pairs that are significantly associated/dissociated according to the chosen score. If appropriate, it also contains a `Cluster` column with the 1-based number of the cluster, or a `-` if the genes do not belong to the same cluster.
 
 #### `cytoscape_input.csv`

diff --git a/example_files/known_assocs.tsv b/example_files/known_assocs.tsv
@@ -0,0 +1,2 @@
+a	b
+r	q
diff --git a/example_files/metadata.csv b/example_files/metadata.csv
@@ -0,0 +1,21 @@
+Gene	Meta1	Meta2
+a	a	2
+b	d	4
+c	cx	3
+d	l	7
+e	x	5
+f	a	2
+g	l	1
+h	e	4
+I	sd	6
+j	d	7
+k	sd	8
+l		6
+m	d	4
+n		4
+o	hf	3
+p		2
+q		1
+r	d	2
+s	c	4
+t	c	5
diff --git a/example_files/roary_mini_example.csv b/example_files/roary_mini_example.csv
@@ -1,5 +1,5 @@
 Gene,Non-unique Gene name,Annotation,No. isolates,No. sequences,Avg sequences per isolate,Genome Fragment,Order within Fragment,Accessory Fragment,Accessory Order with Fragment,QC,Min group size nuc,Max group size nuc,Avg group size nuc,s1,s2,s3,s4,s5
-a,,,,,,,,,,,,,,x,,x,x,
+a,abc,hypothetical protein,,,,,,,,,,,,x,,x,x,
 b,,,,,,,,,,,,,,x,x,x,x,
 c,,,,,,,,,,,,,,x,,x,x,x
 d,,,,,,,,,,,,,,x,,,x,x

diff --git a/goldfinder/data_import.py b/goldfinder/data_import.py
@@ -33,8 +33,8 @@ def load_input(pinput, filetype, pmetadata, pknown_associations):
             exit("Error: input matrix has zero columns or rows. Maybe the matrix is not properly "
                  "formatted?")
         if not df.isin([0, 1]).all().all():
-            exit("Error: Values other than 0 and 1 enountered in input matrix.")
-
+            exit("Error: Values other than 0 and 1 enountered in input matrix. If you provide input"
+                 " in another format than a binary matrix please use the -f parameter.")
         df = df.astype(int)
 
     """

diff --git a/goldfinder/output.py b/goldfinder/output.py
@@ -21,7 +21,7 @@ def result_procedure(p_values_adj, p_values_unadj, significant_score_indices, cl
         cluster_size_viz(clusters, hist_file)
 
     print("Writing significant gene pairs to output")
-    gene_pair_file = f'{poutput}/{pscore}_{mode}_significant_pairs.txt'
+    gene_pair_file = f'{poutput}/{pscore}_{mode}_significant_pairs.csv'
     write_significant_gp(p_values_adj, p_values_unadj, significant_score_indices, cluster_dict,
                          locus_dict, gene_pair_file, pfile_type, perform_clustering, metadata,
                          known_assoc)
@@ -238,9 +238,9 @@ def assemble_gp_line(gene_1, gene_2, file_type, p_unadj, p_adj, locus_dict, perf
 
     if metadata is not None:
         s += ","
-        s += ",".join(metadata.loc[gene_1, :])
+        s += ",".join(metadata.loc[gene_1, :].astype(str))
         s += ","
-        s += ",".join(metadata.loc[gene_2, :])
+        s += ",".join(metadata.loc[gene_2, :].astype(str))
 
     if known_assoc_to_write is not None:
         s += ","