diff --git a/src/call_consensus_pileup.cpp b/src/call_consensus_pileup.cpp index 192f1563..56222d3a 100644 --- a/src/call_consensus_pileup.cpp +++ b/src/call_consensus_pileup.cpp @@ -15,8 +15,9 @@ bool compare_allele_depth(const allele &a, const allele &b){ return b.depth < a.depth; } -ret_t get_consensus_allele(std::vector ad, uint8_t min_qual, double threshold, char gap, double min_insert_threshold){ - //print_allele_depths(ad); +ret_t get_consensus_allele(std::vector ad, uint8_t min_qual, double threshold, char gap, double min_insert_threshold, double insert_threshold){ + //insert threshold refers to insertion > 1bp in length + //min insert threshold refers to 1bp SNV ret_t t; t.nuc=gap; t.q = min_qual+33; @@ -32,7 +33,8 @@ ret_t get_consensus_allele(std::vector ad, uint8_t min_qual, double thre std::vector nuc_pos; allele tmp_a; char n; - uint32_t max_l = 0, max_depth = 0, tmp_depth = 0, cur_depth = 0, total_max_depth = 0, gap_depth = 0, total_indel_depth = 0; + + uint32_t max_l = 0, max_depth = 0, tmp_depth = 0, cur_depth = 0, total_max_depth = 0, gap_depth = 0, total_indel_depth = 0, insert_gap_depth=0; uint8_t ambg_n = 1, ctr = 0; double q = 0, tq = 0, cur_threshold = 0; std::vector::iterator it; @@ -124,12 +126,22 @@ ret_t get_consensus_allele(std::vector ad, uint8_t min_qual, double thre //print_allele_depths(ad); //std::cout << "\n"; //gap_depth = (total_indel_depth > cur_depth) ? total_indel_depth - cur_depth : 0; + //this describes the threshold for an SNV gap_depth = min_insert_threshold * total_max_depth; - //std::cout << max_depth << " " << gap_depth << " " << min_insert_threshold << " " << total_max_depth<<" \n"; - } + //this describes the threshold for an insertion > 1bp + insert_gap_depth = insert_threshold * total_max_depth; + } else gap_depth = 0; // For first position of allele - if(n!='*' && max_depth >= gap_depth){ // TODO: Check what to do when equal.{ + + //if we have multiple insertions in a row, we handle it differently + if(max_l > 2 && n!='*' && max_depth >= insert_gap_depth){ + t.nuc += n; + q += 0.5; // For rounding before converting to int + t.q += (((uint8_t)q)+33); + + } + else if(max_l <= 2 && n!='*' && max_depth >= gap_depth){ // TODO: Check what to do when equal. t.nuc += n; q += 0.5; // For rounding before converting to int t.q += (((uint8_t)q)+33); @@ -138,7 +150,9 @@ ret_t get_consensus_allele(std::vector ad, uint8_t min_qual, double thre return t; } -int call_consensus_from_plup(std::istream &cin, std::string seq_id, std::string out_file, uint8_t min_qual, double threshold, uint8_t min_depth, char gap, bool min_coverage_flag, double min_insert_threshold){ +int call_consensus_from_plup(std::istream &cin, std::string seq_id, std::string out_file, uint8_t min_qual, double threshold, uint8_t min_depth, char gap, bool min_coverage_flag, double min_insert_threshold, double insert_threshold){ + // min_insert is for SNV insertion + // insert_threshold is for longer than 1bp insertions std::string line, cell; std::ofstream fout((out_file+".fa").c_str()); std::ofstream tmp_qout((out_file+".qual.txt").c_str()); @@ -196,7 +210,7 @@ int call_consensus_from_plup(std::istream &cin, std::string seq_id, std::string ret_t t; if(mdepth >= min_depth){ ad = update_allele_depth(ref, bases, qualities, min_qual); - t = get_consensus_allele(ad, min_qual, threshold, gap, min_insert_threshold); + t = get_consensus_allele(ad, min_qual, threshold, gap, min_insert_threshold, insert_threshold); fout << t.nuc; tmp_qout << t.q; } else{ diff --git a/src/call_consensus_pileup.h b/src/call_consensus_pileup.h index cc7c5d75..ee2420a7 100644 --- a/src/call_consensus_pileup.h +++ b/src/call_consensus_pileup.h @@ -19,7 +19,7 @@ struct ret_t { }; void format_alleles(std::vector &ad); -int call_consensus_from_plup(std::istream &cin, std::string seq_id, std::string out_file, uint8_t min_qual, double threshold, uint8_t min_depth, char gap, bool min_coverage_flag, double min_insert_threshold); -ret_t get_consensus_allele(std::vector ad, uint8_t min_qual, double threshold, char gap, double min_insert_threshold); +int call_consensus_from_plup(std::istream &cin, std::string seq_id, std::string out_file, uint8_t min_qual, double threshold, uint8_t min_depth, char gap, bool min_coverage_flag, double min_insert_threshold, double insert_threshold); +ret_t get_consensus_allele(std::vector ad, uint8_t min_qual, double threshold, char gap, double min_insert_threshold, double insert_threshold); #endif diff --git a/src/ivar.cpp b/src/ivar.cpp index 0a9a53e0..d0865c83 100755 --- a/src/ivar.cpp +++ b/src/ivar.cpp @@ -29,7 +29,8 @@ struct args_t { uint8_t min_qual; // -q uint8_t sliding_window; // -s double min_threshold; // -t - double min_insert_threshold; // -c + double insert_threshold; // -c + double snv_threshold; // -z int min_length; // -m std::string f1; // -1 std::string f2; // -2 @@ -120,6 +121,7 @@ void print_consensus_usage(){ " 0.9 | Strict or bases that make up atleast 90% of the depth at a position\n" " 1 | Identical or bases that make up 100% of the depth at a position. Will have highest ambiguities\n" " -c Minimum insertion frequency threshold(0 - 1) to call consensus. (Default: 0.8)\n" + " -z Minimum threshold(0-1) for SNV insertion to call consensus. (Deagult: 0.8)\n" " Frequently used thresholds | Description\n" " ---------------------------|------------\n" " 0 | Allow insertion if it appears even once\n" @@ -178,7 +180,7 @@ void print_version_info(){ static const char *trim_opt_str = "i:b:f:x:p:m:q:s:ekh?"; static const char *variants_opt_str = "p:t:q:m:r:g:h?"; -static const char *consensus_opt_str = "i:p:q:t:c:m:n:kh?"; +static const char *consensus_opt_str = "i:p:q:t:c:z:m:n:kh?"; static const char *removereads_opt_str = "i:p:t:b:h?"; static const char *filtervariants_opt_str = "p:t:f:h?"; static const char *getmasked_opt_str = "i:b:f:p:h?"; @@ -348,14 +350,18 @@ int main(int argc, char* argv[]){ g_args.gap = 'N'; g_args.min_qual = 20; g_args.keep_min_coverage = true; - g_args.min_insert_threshold = 0.8; + g_args.insert_threshold = 0.8; + g_args.snv_threshold = 0.8; while( opt != -1 ) { switch( opt ) { case 't': g_args.min_threshold = atof(optarg); break; case 'c': - g_args.min_insert_threshold = atof(optarg); + g_args.insert_threshold = atof(optarg); + break; + case 'z': + g_args.snv_threshold = atof(optarg); break; case 'i': g_args.seq_id = optarg; @@ -399,12 +405,13 @@ int main(int argc, char* argv[]){ std::cout <<"Minimum Quality: " << (uint16_t) g_args.min_qual << std::endl; std::cout << "Threshold: " << g_args.min_threshold << std::endl; std::cout << "Minimum depth: " << (unsigned) g_args.min_depth << std::endl; - std::cout << "Minimum Insert Threshold: " << g_args.min_insert_threshold << std::endl; + std::cout << "Minimum SNV Threshold: " << g_args.snv_threshold << std::endl; + std::cout << "Minimum Insert Threshold: " << g_args.insert_threshold << std::endl; if(!g_args.keep_min_coverage) std::cout << "Regions with depth less than minimum depth will not added to consensus" << std::endl; else std::cout << "Regions with depth less than minimum depth covered by: " << g_args.gap << std::endl; - res = call_consensus_from_plup(std::cin, g_args.seq_id, g_args.prefix, g_args.min_qual, g_args.min_threshold, g_args.min_depth, g_args.gap, g_args.keep_min_coverage, g_args.min_insert_threshold); + res = call_consensus_from_plup(std::cin, g_args.seq_id, g_args.prefix, g_args.min_qual, g_args.min_threshold, g_args.min_depth, g_args.gap, g_args.keep_min_coverage, g_args.snv_threshold, g_args.insert_threshold); } else if (cmd.compare("removereads") == 0){ opt = getopt( argc, argv, removereads_opt_str); while( opt != -1 ) { diff --git a/tests/test_call_consensus_from_plup.cpp b/tests/test_call_consensus_from_plup.cpp index ae2b68b0..f2066f1d 100755 --- a/tests/test_call_consensus_from_plup.cpp +++ b/tests/test_call_consensus_from_plup.cpp @@ -76,15 +76,15 @@ int main() { for(int i = 0;i del_ad(del_arr, del_arr+sizeof(del_arr)/sizeof(allele)); - s = get_consensus_allele(del_ad, 0, 0, 'N', 1); + s = get_consensus_allele(del_ad, 0, 0, 'N', 1, 1); success += (s.nuc.compare("") == 0) ? 1: 0; success += (s.q.compare("") == 0) ? 1 : 0; return (success == num_tests) ? 0 : -1; diff --git a/tests/test_consensus_min_depth.cpp b/tests/test_consensus_min_depth.cpp index 5f1e5164..67392675 100755 --- a/tests/test_consensus_min_depth.cpp +++ b/tests/test_consensus_min_depth.cpp @@ -7,7 +7,7 @@ int call_cns_check_outfile(std::string input_id, std::string prefix, std::string cns, char gap, bool call_min_depth, int min_depth){ std::string path = "../data/test.gap.sorted.mpileup"; std::ifstream mplp(path); - call_consensus_from_plup(mplp, input_id, prefix, 20, 0, min_depth, gap, call_min_depth, 1); + call_consensus_from_plup(mplp, input_id, prefix, 20, 0, min_depth, gap, call_min_depth, 1, 1); std::ifstream outFile(prefix+".fa"); std::string l; getline(outFile, l); // Ignore first line diff --git a/tests/test_consensus_min_insert_threshold.cpp b/tests/test_consensus_min_insert_threshold.cpp index 23d71174..36d6515e 100755 --- a/tests/test_consensus_min_insert_threshold.cpp +++ b/tests/test_consensus_min_insert_threshold.cpp @@ -3,7 +3,7 @@ #include "../src/allele_functions.h" int main() { - int num_tests = 8; + int num_tests = 10; allele a1 = { "T", 8, @@ -40,25 +40,71 @@ int main() { ad.at(i) = arr[i]; } - s = get_consensus_allele(ad,20,.8, 'N', 0); + s = get_consensus_allele(ad,20,.8, 'N', 0, 0); std::cout << s.nuc << ": " << s.q << std::endl; success += (s.nuc.compare("TN") == 0) ? 1: 0; success += (s.q.compare("??") == 0) ? 1 : 0; - s = get_consensus_allele(ad,20,.8, 'N', 1); + s = get_consensus_allele(ad,20,.8, 'N', 1, 1); std::cout << s.nuc << ": " << s.q << std::endl; success += (s.nuc.compare("T") == 0) ? 1: 0; success += (s.q.compare("?") == 0) ? 1 : 0; - s = get_consensus_allele(ad,20,.8, 'N',.6); + s = get_consensus_allele(ad,20,.8, 'N',.6, 0.6); std::cout << s.nuc << ": " << s.q << std::endl; success += (s.nuc.compare("TN") == 0) ? 1: 0; success += (s.q.compare("??") == 0) ? 1 : 0; - s = get_consensus_allele(ad,20,.8, 'N',.8); + s = get_consensus_allele(ad,20,.8, 'N',.8, 0.8); std::cout << s.nuc << ": " << s.q << std::endl; success += (s.nuc.compare("T") == 0) ? 1: 0; success += (s.q.compare("?") == 0) ? 1 : 0; + + //adding a test to check snv against insertion threshold + allele a3 = { + "T", + 5, + 0, + 30, + 0, + 0, + 0 + }; + allele a4 = { + "+GGGGGGG", + 3, + 0, + 30, + 0, + 0, + 0 + }; + allele a5 = { + "A", + 2, + 0, + 30, + 0, + 0, + 0 + }; + allele arr1[] = {a3, a4, a5}; + std::vector ad1(arr1, arr1+sizeof(arr1)/sizeof(allele)); + + for(int i = 0;i1bp + s = get_consensus_allele(ad1,20,0, 'N',0.8, 0); + std::cout << s.nuc << ": " << s.q << std::endl; + success += (s.nuc.compare("TGGGGGGG") == 0) ? 1: 0; + + s = get_consensus_allele(ad1,20, 0, 'N',0, 0.8); + std::cout << s.nuc << ": " << s.q << std::endl; + success += (s.nuc.compare("T") == 0) ? 1: 0; + return (success == num_tests) ? 0 : -1; } diff --git a/tests/test_consensus_seq_id.cpp b/tests/test_consensus_seq_id.cpp index 4b5e73ee..4790b251 100755 --- a/tests/test_consensus_seq_id.cpp +++ b/tests/test_consensus_seq_id.cpp @@ -8,7 +8,7 @@ int call_cns_check_outfile(std::string seq_id, std::string prefix, char gap, boo std::string path = "../data/test.gap.sorted.mpileup"; std::string expctd_hdr = ""; std::ifstream mplp(path); - call_consensus_from_plup(mplp, seq_id, prefix, 20, 0, min_depth, gap, call_min_depth, 1); + call_consensus_from_plup(mplp, seq_id, prefix, 20, 0, min_depth, gap, call_min_depth, 1, 1); std::ifstream outFile(prefix+".fa"); std::string l; getline(outFile, l); // header diff --git a/tests/test_consensus_threshold.cpp b/tests/test_consensus_threshold.cpp index 9dd1a53e..0ae6cbed 100755 --- a/tests/test_consensus_threshold.cpp +++ b/tests/test_consensus_threshold.cpp @@ -76,17 +76,17 @@ int main() { for(int i = 0;i