diff --git a/tool_collections/fastx_toolkit/fasta_clipping_histogram/.shed.yml b/tool_collections/fastx_toolkit/fasta_clipping_histogram/.shed.yml new file mode 100644 index 00000000000..8838742c67a --- /dev/null +++ b/tool_collections/fastx_toolkit/fasta_clipping_histogram/.shed.yml @@ -0,0 +1,13 @@ +categories: +- Fasta Manipulation +- Graphics +- Statistics +description: Length Distribution chart +long_description: | + This tool creates a histogram image of sequence lengths distribution + in a given fasta dataset file. +homepage_url: http://hannonlab.cshl.edu/fastx_toolkit/ +name: fasta_clipping_histogram +owner: devteam +remote_repository_url: https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/fastx_toolkit/fasta_clipping_histogram +type: unrestricted diff --git a/tool_collections/fastx_toolkit/fasta_clipping_histogram/fasta_clipping_histogram.xml b/tool_collections/fastx_toolkit/fasta_clipping_histogram/fasta_clipping_histogram.xml new file mode 100644 index 00000000000..8c887da5070 --- /dev/null +++ b/tool_collections/fastx_toolkit/fasta_clipping_histogram/fasta_clipping_histogram.xml @@ -0,0 +1,117 @@ + + chart + + macros.xml + + + perl-gdgraph + + fasta_clipping_histogram.pl $input $outfile + + + + + + + + + + + + + + + + + + + + sequence1 + AGTAGTAGGTGATGTAGAGAGAGAGAGAGTAG + >sequence2 + GTGTGTGTGGGAAGTTGACACAGTA + >sequence3 + CCTTGAGATTAACGCTAATCAAGTAAAC + +If the sequences span over multiple lines:: + + >sequence1 + CAGCATCTACATAATATGATCGCTATTAAACTTAAATCTCCTTGACGGAG + TCTTCGGTCATAACACAAACCCAGACCTACGTATATGACAAAGCTAATAG + aactggtctttacctTTAAGTTG + +Use the **FASTA Width Formatter** tool to re-format the FASTA into a single-lined sequences:: + + >sequence1 + CAGCATCTACATAATATGATCGCTATTAAACTTAAATCTCCTTGACGGAGTCTTCGGTCATAACACAAACCCAGACCTACGTATATGACAAAGCTAATAGaactggtctttacctTTAAGTTG + +----- + +**Multiplicity counts (a.k.a reads-count)** + +If the sequence identifier (the text after the '>') contains a dash and a number, it is treated as a multiplicity count value (i.e. how many times that individual sequence repeated in the original FASTA file, before collapsing). + +Example 1 - The following FASTA file *does not* have multiplicity counts:: + + >seq1 + GGATCC + >seq2 + GGTCATGGGTTTAAA + >seq3 + GGGATATATCCCCACACACACACAC + +Each sequence is counts as one, to produce the following chart: + +.. image:: ${static_path}/fastx_icons/fasta_clipping_histogram_3.png + +Example 2 - The following FASTA file have multiplicity counts:: + + >seq1-2 + GGATCC + >seq2-10 + GGTCATGGGTTTAAA + >seq3-3 + GGGATATATCCCCACACACACACAC + +The first sequence counts as 2, the second as 10, the third as 3, to produce the following chart: + +.. image:: ${static_path}/fastx_icons/fasta_clipping_histogram_4.png + +Use the **FASTA Collapser** tool to create FASTA files with multiplicity counts. + +------ + +This tool is based on `FASTX-toolkit`__ by Assaf Gordon. + +.. __: http://hannonlab.cshl.edu/fastx_toolkit/ + ]]> + + + diff --git a/tool_collections/fastx_toolkit/fasta_clipping_histogram/macros.xml b/tool_collections/fastx_toolkit/fasta_clipping_histogram/macros.xml new file mode 120000 index 00000000000..0c6ff6c3756 --- /dev/null +++ b/tool_collections/fastx_toolkit/fasta_clipping_histogram/macros.xml @@ -0,0 +1 @@ +../macros.xml \ No newline at end of file diff --git a/tool_collections/fastx_toolkit/fasta_clipping_histogram/test-data/fasta_clipping_histogram-in1.fa b/tool_collections/fastx_toolkit/fasta_clipping_histogram/test-data/fasta_clipping_histogram-in1.fa new file mode 100644 index 00000000000..dab0e10a3e3 --- /dev/null +++ b/tool_collections/fastx_toolkit/fasta_clipping_histogram/test-data/fasta_clipping_histogram-in1.fa @@ -0,0 +1,10 @@ +>Scaffold3648 +AGGAATGATGACTACAATGATCAACTTAACCTATCTATTTAATTTAGTTCCCTAATGTCAGGGACCTACCTGTTTTTGTTATGTTTGGGTTTTGTTGTTGTTGTTTTTTTAATCTGAAGGTATTGTGCATTATATGACCTGTAATACACAGTATAACTTTTCAAATACTTTTGTTTTACAACTTTTCTCTCTGGACTTATATTAAAGTCAATTTTAATGAACATGTAGTAAAAACTAATACATGTACATCTACAGTTTATTTATTTTTTTCTTCTTCTTTTTGTATTTCTTGTGTTACATTATTTCACTTCACGTTCATGTTACCAACCTTGCCCCCTTGCTTTCCATGCAAAAAAAGAAAAAAAAGAAGCAATACTTACACTTACCCTTGAGATATCTTGATCTGAATGCTTTAACATTCTATATGTACAATAAATTTTTGTATCTATAGCCTATTATTATATATGTTGCTATGTCAGGCACATTGACAACATTCTCAGAAGGTTAGAAGATGGTATTGTTCTGAAATGCCTGGAATGCCTTGTGAACTAAGATGATTACTCATGTCATTAAAGTCCCCTAACCCAGGTATTTCCTCCTTCCCATGACGAAAACAGTCCATTTAAACTTCACCCCACTTTGGACCCGAAAGTGGGGTGCATTTTGGTGGTAAGCTCACCACAGAGCAAGAGAGAGTTAGAGTCCCTAATCTGCAGTGTAAACAAACTTTGCCAGGACATCACCAGCCCAACCTTGATAAGTACTGCTTGGAACTCCTCCATGATGTTCTAGTCTTATTCGCAGTCTCATATAGGTTCGGATTTTGTCCATTCTCATAGCTACCAGTATACATGGGAGATGCCAGTTTCATCTTCCTTGCTTCACTTTATAAGCATAGTTATATCANGAACTTCCTGGTTATAATTATGTTCCTTTCAAGTTTCATCATAATTGTCTAGTTCGATATAGTACATGGACACAATTAAATATGATATTGTCT +>Scaffold9299 +CAGCATCTACATAATATGATCGCTATTAAACTTAAATCTCCTTGACGGAGTCTTCGGTCATAACACAAACCCAGACCTACGTATATGACAAAGCTAATAGGAAAAGCATCCTTGTTTGTTTCACTATGCTTTTTAATGGTTGACGTTAAaggtaaagaccagtattggaaacgccccaatttcaaaaaatgaaatggaagctctcattaccaatcatgtgaaagaatatgttttgactaatacatgatgataaaaaaattgccgggaaaccgcctactaattcatatatttagtaaatttgtttctctcatggtctgtgagagatatagggtagtcccatatacatctttctgtgtatagtgcttgtaactttacgaagaatgggccaaatttcttatcattttgatgattccagaaccttgcagatgcgagatggtagatgatcaaccttttctgatcgattccataacgtttctttcacaatgcaatcgcatgaccataactggtctttacctTTAAGTTGTAGGTCTTAATTGATAACACTATATAGTTTTTTTCTTTTTACTGTTTTTATTAATGACCTCTGTAATTTGCCCTATTGTGAAAATACTAAAATATGTTTATACGCCGATGATGCGGCAATATTTTGCCAAGGCAAAGAAATTGCCCTTGTTGAGAAAACTCTTAAATGTGAGTTTAAAAAAATAGTTGATCACATTGAAAAAGATGACTTAATGTTGAATATCAAGAAGTGTAAGATCATGTTATTTGGGACAAGAAAACGAATCAAAAATCAAAGTGTACGCTTGATTTACAGAGATAATGTTATCGAAGTTGTAAATGAATTTAAATATCTTGGTGTATTATTTGATAATTATTTAAAGTGGGATATACATATATCGAAAACTGCCTCCAAAATATCTAGAACCATATCATGTATAAAACGAATTAAATATTATTTGCCGAAAAGAATTTTAAAATTGTTATATGATAGTTTGATATTGTCACATATTGACTACGGTATTGTTTTGTGGGGATGTTCAGCAAAGTGTCATTTGGAAAAGTTACAAAAGTTACAAAATCGTTATGCCCGTTTAATACTAAACGTAGATATTTTGACACCTCGTATTATATTATTATCCTCTCTAAGATGGCAATCAGTTGTTCAGAGAGTGCAATACCAA +>Scaffold9309 +GAAGGAAGAAGAGGAAAATAATGATGAATTTGTAGAATTTCTATAACGTATGAAAACATAAACAACATGAAAAAGTATGAACCGACAGAAGAATGAAAATTTCAATCATATAACATGTCATTCACTTCTCTTCTCTGACTGTCAAGTATTAGGTATTCCTTTTTATTTCCTCTTAAAATGATCATAGTTTCCTATTTCTTTTACACCATTGGGAAGGGAATTCCAATGTTTTATGGCATTGTAATAAAACGAATTTCCAATACTACCTACTCTTTCTGGTAAGTTAAAGTTGAATCGGCTATTTCTTGTATTATAATCATGTACGTCAGTAACAAGATCGAAGTTGGATCGAATATAATGATTCGACCTAGTATGATATATTTTATGCACGTGATGCAATACGAGTTGTTTTGATCTTTGGTCGACTTCAAGAAAACCAGCTTTAGAAAGTTCGCTGTAGCCAACATGAGTTCTTGCCTTGGACTAGAACAGTTGATAAATCTCACCATTTTGTTCTTTAAGATGGGTAGAAGAATCCCTGCAATCTAAATGGTCAATTACTGTGAAGTTATTTTTACTGGATGCACCCAATAtttttttgataatttttttttctttgataatttttttctttttctttaataaattttttggataatttttttttggataaatagttcttttttgataattctaataatttttttatttattttttttttttctataattttttttaaaaaatttattaatttttaattaaaaaaaaaataaGAGTTAACAGATTAAGGGAAACTGACAATTCAAAAAAAAAAAAAA +>Scaffold9310 +GCGGGGGCTGGGGAGGAAGGGGTGGCGTTATTTCACTTCCGATCTAATACGCTTTCTTAAGACACTGAAATATCAGTAGGTATTGGTATAGAGAATTACTTTTTATTTTTAATTAAAACATTATCGAAATGAAGATACAGAGAAAAACGATGAGATGTAAGAAGTGCGCGTATTTAtgtgtgtgggtgcgtgtgtgtgtgtgtgtgtgtgttgtgtgcgtgcgtgtgtgtggtggtgtgtACTAATTTTGATGTGTGTTGTGGCACAATTGCAATCATCAGTATCTTCATGAAAATGATAACCAGAAGCACAAAAAGGAGGgtgcgtgtgtgtgtgtgtgtgtttagtgtgcgtgcgtgtgagggtgtttaagtgtgtatgtCGGAAATGTGGCACAATTGCAATCATCTGTATCTTCATGAAAATGATAACCAGAAGAACAAAAAAAAAAAACATTGAGAGAACATGTTTTTTTGATGGAAGACAAGAAGTTCTCGTAACGTAGGATCTCCGAGACATGATGGGGTCAACTTAAAAAGAGAGCAGTGAGAGGCATTTATATCGAAGGTCAGGGAAAGGCAAACAAAGAAAGAAAAAAAAAAGGCTCACAGGAGAACGAAAACACGGGCCAAAATAATAAACAGGAGCAAGTGAACGGGCAGTTTGGTAGCTACTTCATTTACCGGCTTTTAAaggtactatgtcccatttgcaggtcaaaaaaaatgaaaaagttaaattccaactgcatttgaaagataatactaatttacaacttccctaaaaaaggtggggcttgaaaatgtcttcaagtgcggaaaataacgactattagttgtcaaatcgactttagggCTATAGAGCCCAAAAGTAATAGTCTTGA +>Scaffold11911 +TTCTTGGCACCCCCCCCCCCCCCACACTCCTGCACTGAAGAACTACTCAAGTTTAAACTTTGCATTGCTTTTCTTTCTTTTTCAGTATTTTTTGCTTGGTACATGTTTCTCTTAATATCTGTCGTATAGatttttaatatttttatttatatCTACGTCAATCTGGCTGttctttttcttgtcttctttttttttctctctcttttttttcctcgtattttGTATTGATCCTTACCCTAGTTTTTGAACTTGAACAGCAATTTGCAGCACTCAAATTTCTTTAAAATTACCTTCTCTTATTTGtctctgttcccctctccccccctctctctctctctctctctctctctctctctctctttcATCTCCCATATCATAATTTGAAGTACCATCTATGGTGTTTTCAGATTGATCTTTCTTGCTTTCCCCACCCTCCCCCTTTATGCAGTTAATTTTCAGTCTATTTGTGTTTTCTGTGGTTGATTCTAATCATATTCTAACTCTTATTTTACATTTTACTTCACTAACAACTGGTTTATTATATTTGTTACTAATTTTGAATTAAACTATTTACCATTCTGAACGAACTGAAAGATTAAAGATCAAACTATCTATGAATAGAATGGTATTTCTTCAATTTATTCAAATTTCTCTCTCTTTAACCCCCTTTTTCTGCTTGCATTTTTATCCCTTTGCCGTGGACTTCACTGGATATTTTGCTTTGATGCCAATCCAACAATTTTGCATATATTA diff --git a/tool_collections/fastx_toolkit/fasta_clipping_histogram/test-data/fasta_clipping_histogram-in2.fa b/tool_collections/fastx_toolkit/fasta_clipping_histogram/test-data/fasta_clipping_histogram-in2.fa new file mode 100644 index 00000000000..40fbc313e49 --- /dev/null +++ b/tool_collections/fastx_toolkit/fasta_clipping_histogram/test-data/fasta_clipping_histogram-in2.fa @@ -0,0 +1,84 @@ +>Scaffold3648 +AGGAATGATGACTACAATGATCAACTTAACCTATCTATTTAATTTAGTTCCCTAATGTCA +GGGACCTACCTGTTTTTGTTATGTTTGGGTTTTGTTGTTGTTGTTTTTTTAATCTGAAGG +TATTGTGCATTATATGACCTGTAATACACAGTATAACTTTTCAAATACTTTTGTTTTACA +ACTTTTCTCTCTGGACTTATATTAAAGTCAATTTTAATGAACATGTAGTAAAAACTAATA +CATGTACATCTACAGTTTATTTATTTTTTTCTTCTTCTTTTTGTATTTCTTGTGTTACAT +TATTTCACTTCACGTTCATGTTACCAACCTTGCCCCCTTGCTTTCCATGCAAAAAAAGAA +AAAAAAGAAGCAATACTTACACTTACCCTTGAGATATCTTGATCTGAATGCTTTAACATT +CTATATGTACAATAAATTTTTGTATCTATAGCCTATTATTATATATGTTGCTATGTCAGG +CACATTGACAACATTCTCAGAAGGTTAGAAGATGGTATTGTTCTGAAATGCCTGGAATGC +CTTGTGAACTAAGATGATTACTCATGTCATTAAAGTCCCCTAACCCAGGTATTTCCTCCT +TCCCATGACGAAAACAGTCCATTTAAACTTCACCCCACTTTGGACCCGAAAGTGGGGTGC +ATTTTGGTGGTAAGCTCACCACAGAGCAAGAGAGAGTTAGAGTCCCTAATCTGCAGTGTA +AACAAACTTTGCCAGGACATCACCAGCCCAACCTTGATAAGTACTGCTTGGAACTCCTCC +ATGATGTTCTAGTCTTATTCGCAGTCTCATATAGGTTCGGATTTTGTCCATTCTCATAGC +TACCAGTATACATGGGAGATGCCAGTTTCATCTTCCTTGCTTCACTTTATAAGCATAGTT +ATATCANGAACTTCCTGGTTATAATTATGTTCCTTTCAAGTTTCATCATAATTGTCTAGT +TCGATATAGTACATGGACACAATTAAATATGATATTGTCT +>Scaffold9299 +CAGCATCTACATAATATGATCGCTATTAAACTTAAATCTCCTTGACGGAGTCTTCGGTCA +TAACACAAACCCAGACCTACGTATATGACAAAGCTAATAGGAAAAGCATCCTTGTTTGTT +TCACTATGCTTTTTAATGGTTGACGTTAAaggtaaagaccagtattggaaacgccccaat +ttcaaaaaatgaaatggaagctctcattaccaatcatgtgaaagaatatgttttgactaa +tacatgatgataaaaaaattgccgggaaaccgcctactaattcatatatttagtaaattt +gtttctctcatggtctgtgagagatatagggtagtcccatatacatctttctgtgtatag +tgcttgtaactttacgaagaatgggccaaatttcttatcattttgatgattccagaacct +tgcagatgcgagatggtagatgatcaaccttttctgatcgattccataacgtttctttca +caatgcaatcgcatgaccataactggtctttacctTTAAGTTGTAGGTCTTAATTGATAA +CACTATATAGTTTTTTTCTTTTTACTGTTTTTATTAATGACCTCTGTAATTTGCCCTATT +GTGAAAATACTAAAATATGTTTATACGCCGATGATGCGGCAATATTTTGCCAAGGCAAAG +AAATTGCCCTTGTTGAGAAAACTCTTAAATGTGAGTTTAAAAAAATAGTTGATCACATTG +AAAAAGATGACTTAATGTTGAATATCAAGAAGTGTAAGATCATGTTATTTGGGACAAGAA +AACGAATCAAAAATCAAAGTGTACGCTTGATTTACAGAGATAATGTTATCGAAGTTGTAA +ATGAATTTAAATATCTTGGTGTATTATTTGATAATTATTTAAAGTGGGATATACATATAT +CGAAAACTGCCTCCAAAATATCTAGAACCATATCATGTATAAAACGAATTAAATATTATT +TGCCGAAAAGAATTTTAAAATTGTTATATGATAGTTTGATATTGTCACATATTGACTACG +GTATTGTTTTGTGGGGATGTTCAGCAAAGTGTCATTTGGAAAAGTTACAAAAGTTACAAA +ATCGTTATGCCCGTTTAATACTAAACGTAGATATTTTGACACCTCGTATTATATTATTAT +CCTCTCTAAGATGGCAATCAGTTGTTCAGAGAGTGCAATACCAA +>Scaffold9309 +GAAGGAAGAAGAGGAAAATAATGATGAATTTGTAGAATTTCTATAACGTATGAAAACATA +AACAACATGAAAAAGTATGAACCGACAGAAGAATGAAAATTTCAATCATATAACATGTCA +TTCACTTCTCTTCTCTGACTGTCAAGTATTAGGTATTCCTTTTTATTTCCTCTTAAAATG +ATCATAGTTTCCTATTTCTTTTACACCATTGGGAAGGGAATTCCAATGTTTTATGGCATT +GTAATAAAACGAATTTCCAATACTACCTACTCTTTCTGGTAAGTTAAAGTTGAATCGGCT +ATTTCTTGTATTATAATCATGTACGTCAGTAACAAGATCGAAGTTGGATCGAATATAATG +ATTCGACCTAGTATGATATATTTTATGCACGTGATGCAATACGAGTTGTTTTGATCTTTG +GTCGACTTCAAGAAAACCAGCTTTAGAAAGTTCGCTGTAGCCAACATGAGTTCTTGCCTT +GGACTAGAACAGTTGATAAATCTCACCATTTTGTTCTTTAAGATGGGTAGAAGAATCCCT +GCAATCTAAATGGTCAATTACTGTGAAGTTATTTTTACTGGATGCACCCAATAttttttt +gataatttttttttctttgataatttttttctttttctttaataaattttttggataatt +tttttttggataaatagttcttttttgataattctaataatttttttatttatttttttt +ttttctataattttttttaaaaaatttattaatttttaattaaaaaaaaaataaGAGTTA +ACAGATTAAGGGAAACTGACAATTCAAAAAAAAAAAAAA +>Scaffold9310 +GCGGGGGCTGGGGAGGAAGGGGTGGCGTTATTTCACTTCCGATCTAATACGCTTTCTTAA +GACACTGAAATATCAGTAGGTATTGGTATAGAGAATTACTTTTTATTTTTAATTAAAACA +TTATCGAAATGAAGATACAGAGAAAAACGATGAGATGTAAGAAGTGCGCGTATTTAtgtg +tgtgggtgcgtgtgtgtgtgtgtgtgtgtgttgtgtgcgtgcgtgtgtgtggtggtgtgt +ACTAATTTTGATGTGTGTTGTGGCACAATTGCAATCATCAGTATCTTCATGAAAATGATA +ACCAGAAGCACAAAAAGGAGGgtgcgtgtgtgtgtgtgtgtgtttagtgtgcgtgcgtgt +gagggtgtttaagtgtgtatgtCGGAAATGTGGCACAATTGCAATCATCTGTATCTTCAT +GAAAATGATAACCAGAAGAACAAAAAAAAAAAACATTGAGAGAACATGTTTTTTTGATGG +AAGACAAGAAGTTCTCGTAACGTAGGATCTCCGAGACATGATGGGGTCAACTTAAAAAGA +GAGCAGTGAGAGGCATTTATATCGAAGGTCAGGGAAAGGCAAACAAAGAAAGAAAAAAAA +AAGGCTCACAGGAGAACGAAAACACGGGCCAAAATAATAAACAGGAGCAAGTGAACGGGC +AGTTTGGTAGCTACTTCATTTACCGGCTTTTAAaggtactatgtcccatttgcaggtcaa +aaaaaatgaaaaagttaaattccaactgcatttgaaagataatactaatttacaacttcc +ctaaaaaaggtggggcttgaaaatgtcttcaagtgcggaaaataacgactattagttgtc +aaatcgactttagggCTATAGAGCCCAAAAGTAATAGTCTTGA +>Scaffold11911 +TTCTTGGCACCCCCCCCCCCCCCACACTCCTGCACTGAAGAACTACTCAAGTTTAAACTT +TGCATTGCTTTTCTTTCTTTTTCAGTATTTTTTGCTTGGTACATGTTTCTCTTAATATCT +GTCGTATAGatttttaatatttttatttatatCTACGTCAATCTGGCTGttctttttctt +gtcttctttttttttctctctcttttttttcctcgtattttGTATTGATCCTTACCCTAG +TTTTTGAACTTGAACAGCAATTTGCAGCACTCAAATTTCTTTAAAATTACCTTCTCTTAT +TTGtctctgttcccctctccccccctctctctctctctctctctctctctctctctctct +ttcATCTCCCATATCATAATTTGAAGTACCATCTATGGTGTTTTCAGATTGATCTTTCTT +GCTTTCCCCACCCTCCCCCTTTATGCAGTTAATTTTCAGTCTATTTGTGTTTTCTGTGGT +TGATTCTAATCATATTCTAACTCTTATTTTACATTTTACTTCACTAACAACTGGTTTATT +ATATTTGTTACTAATTTTGAATTAAACTATTTACCATTCTGAACGAACTGAAAGATTAAA +GATCAAACTATCTATGAATAGAATGGTATTTCTTCAATTTATTCAAATTTCTCTCTCTTT +AACCCCCTTTTTCTGCTTGCATTTTTATCCCTTTGCCGTGGACTTCACTGGATATTTTGC +TTTGATGCCAATCCAACAATTTTGCATATATTA diff --git a/tool_collections/fastx_toolkit/fasta_clipping_histogram/test-data/fasta_clipping_histogram-out1.png b/tool_collections/fastx_toolkit/fasta_clipping_histogram/test-data/fasta_clipping_histogram-out1.png new file mode 100644 index 00000000000..52c34a07f3e Binary files /dev/null and b/tool_collections/fastx_toolkit/fasta_clipping_histogram/test-data/fasta_clipping_histogram-out1.png differ diff --git a/tool_collections/fastx_toolkit/fasta_clipping_histogram/test-data/fasta_clipping_histogram-out2.png b/tool_collections/fastx_toolkit/fasta_clipping_histogram/test-data/fasta_clipping_histogram-out2.png new file mode 100644 index 00000000000..0d5c131824a Binary files /dev/null and b/tool_collections/fastx_toolkit/fasta_clipping_histogram/test-data/fasta_clipping_histogram-out2.png differ diff --git a/tool_collections/fastx_toolkit/fasta_formatter/.shed.yml b/tool_collections/fastx_toolkit/fasta_formatter/.shed.yml new file mode 100644 index 00000000000..6c1e37fccf3 --- /dev/null +++ b/tool_collections/fastx_toolkit/fasta_formatter/.shed.yml @@ -0,0 +1,10 @@ +homepage_url: http://hannonlab.cshl.edu/fastx_toolkit/ +categories: +- Fasta Manipulation +description: FASTA Width formatter +long_description: | + This tool re-formats a FASTA file, changing the width of the nucleotides lines. +name: fasta_formatter +owner: devteam +remote_repository_url: https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/fastx_toolkit/fasta_formatter +type: unrestricted diff --git a/tool_collections/fastx_toolkit/fasta_formatter/fasta_formatter.xml b/tool_collections/fastx_toolkit/fasta_formatter/fasta_formatter.xml new file mode 100644 index 00000000000..b754b91755b --- /dev/null +++ b/tool_collections/fastx_toolkit/fasta_formatter/fasta_formatter.xml @@ -0,0 +1,91 @@ + + formatter + + macros.xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +**What it does** + +This tool re-formats a FASTA file, changing the width of the nucleotides lines. + +**TIP:** Outputting a single line (with **width = 0**) can be useful for scripting (with **grep**, **awk**, and **perl**). Every odd line is a sequence identifier, and every even line is a nucleotides line. + +-------- + +**Example** + +Input FASTA file (each nucleotides line is 50 characters long):: + + >Scaffold3648 + AGGAATGATGACTACAATGATCAACTTAACCTATCTATTTAATTTAGTTC + CCTAATGTCAGGGACCTACCTGTTTTTGTTATGTTTGGGTTTTGTTGTTG + TTGTTTTTTTAATCTGAAGGTATTGTGCATTATATGACCTGTAATACACA + ATTAAAGTCAATTTTAATGAACATGTAGTAAAAACT + >Scaffold9299 + CAGCATCTACATAATATGATCGCTATTAAACTTAAATCTCCTTGACGGAG + TCTTCGGTCATAACACAAACCCAGACCTACGTATATGACAAAGCTAATAG + aactggtctttacctTTAAGTTG + + +Output FASTA file (with width=80):: + + >Scaffold3648 + AGGAATGATGACTACAATGATCAACTTAACCTATCTATTTAATTTAGTTCCCTAATGTCAGGGACCTACCTGTTTTTGTT + ATGTTTGGGTTTTGTTGTTGTTGTTTTTTTAATCTGAAGGTATTGTGCATTATATGACCTGTAATACACAATTAAAGTCA + ATTTTAATGAACATGTAGTAAAAACT + >Scaffold9299 + CAGCATCTACATAATATGATCGCTATTAAACTTAAATCTCCTTGACGGAGTCTTCGGTCATAACACAAACCCAGACCTAC + GTATATGACAAAGCTAATAGaactggtctttacctTTAAGTTG + +Output FASTA file (with width=0 => single line):: + + >Scaffold3648 + AGGAATGATGACTACAATGATCAACTTAACCTATCTATTTAATTTAGTTCCCTAATGTCAGGGACCTACCTGTTTTTGTTATGTTTGGGTTTTGTTGTTGTTGTTTTTTTAATCTGAAGGTATTGTGCATTATATGACCTGTAATACACAATTAAAGTCAATTTTAATGAACATGTAGTAAAAACT + >Scaffold9299 + CAGCATCTACATAATATGATCGCTATTAAACTTAAATCTCCTTGACGGAGTCTTCGGTCATAACACAAACCCAGACCTACGTATATGACAAAGCTAATAGaactggtctttacctTTAAGTTG + +------ + +This tool is based on `FASTX-toolkit`__ by Assaf Gordon. + + .. __: http://hannonlab.cshl.edu/fastx_toolkit/ + + + diff --git a/tool_collections/fastx_toolkit/fasta_formatter/macros.xml b/tool_collections/fastx_toolkit/fasta_formatter/macros.xml new file mode 120000 index 00000000000..0c6ff6c3756 --- /dev/null +++ b/tool_collections/fastx_toolkit/fasta_formatter/macros.xml @@ -0,0 +1 @@ +../macros.xml \ No newline at end of file diff --git a/tool_collections/fastx_toolkit/fasta_formatter/test-data/fasta_formatter1.fasta b/tool_collections/fastx_toolkit/fasta_formatter/test-data/fasta_formatter1.fasta new file mode 100644 index 00000000000..3c76807ab38 --- /dev/null +++ b/tool_collections/fastx_toolkit/fasta_formatter/test-data/fasta_formatter1.fasta @@ -0,0 +1,100 @@ +>Scaffold3648 +AGGAATGATGACTACAATGATCAACTTAACCTATCTATTTAATTTAGTTC +CCTAATGTCAGGGACCTACCTGTTTTTGTTATGTTTGGGTTTTGTTGTTG +TTGTTTTTTTAATCTGAAGGTATTGTGCATTATATGACCTGTAATACACA +GTATAACTTTTCAAATACTTTTGTTTTACAACTTTTCTCTCTGGACTTAT +ATTAAAGTCAATTTTAATGAACATGTAGTAAAAACTAATACATGTACATC +TACAGTTTATTTATTTTTTTCTTCTTCTTTTTGTATTTCTTGTGTTACAT +TATTTCACTTCACGTTCATGTTACCAACCTTGCCCCCTTGCTTTCCATGC +AAAAAAAGAAAAAAAAGAAGCAATACTTACACTTACCCTTGAGATATCTT +GATCTGAATGCTTTAACATTCTATATGTACAATAAATTTTTGTATCTATA +GCCTATTATTATATATGTTGCTATGTCAGGCACATTGACAACATTCTCAG +AAGGTTAGAAGATGGTATTGTTCTGAAATGCCTGGAATGCCTTGTGAACT +AAGATGATTACTCATGTCATTAAAGTCCCCTAACCCAGGTATTTCCTCCT +TCCCATGACGAAAACAGTCCATTTAAACTTCACCCCACTTTGGACCCGAA +AGTGGGGTGCATTTTGGTGGTAAGCTCACCACAGAGCAAGAGAGAGTTAG +AGTCCCTAATCTGCAGTGTAAACAAACTTTGCCAGGACATCACCAGCCCA +ACCTTGATAAGTACTGCTTGGAACTCCTCCATGATGTTCTAGTCTTATTC +GCAGTCTCATATAGGTTCGGATTTTGTCCATTCTCATAGCTACCAGTATA +CATGGGAGATGCCAGTTTCATCTTCCTTGCTTCACTTTATAAGCATAGTT +ATATCANGAACTTCCTGGTTATAATTATGTTCCTTTCAAGTTTCATCATA +ATTGTCTAGTTCGATATAGTACATGGACACAATTAAATATGATATTGTCT +>Scaffold9299 +CAGCATCTACATAATATGATCGCTATTAAACTTAAATCTCCTTGACGGAG +TCTTCGGTCATAACACAAACCCAGACCTACGTATATGACAAAGCTAATAG +GAAAAGCATCCTTGTTTGTTTCACTATGCTTTTTAATGGTTGACGTTAAa +ggtaaagaccagtattggaaacgccccaatttcaaaaaatgaaatggaag +ctctcattaccaatcatgtgaaagaatatgttttgactaatacatgatga +taaaaaaattgccgggaaaccgcctactaattcatatatttagtaaattt +gtttctctcatggtctgtgagagatatagggtagtcccatatacatcttt +ctgtgtatagtgcttgtaactttacgaagaatgggccaaatttcttatca +ttttgatgattccagaaccttgcagatgcgagatggtagatgatcaacct +tttctgatcgattccataacgtttctttcacaatgcaatcgcatgaccat +aactggtctttacctTTAAGTTGTAGGTCTTAATTGATAACACTATATAG +TTTTTTTCTTTTTACTGTTTTTATTAATGACCTCTGTAATTTGCCCTATT +GTGAAAATACTAAAATATGTTTATACGCCGATGATGCGGCAATATTTTGC +CAAGGCAAAGAAATTGCCCTTGTTGAGAAAACTCTTAAATGTGAGTTTAA +AAAAATAGTTGATCACATTGAAAAAGATGACTTAATGTTGAATATCAAGA +AGTGTAAGATCATGTTATTTGGGACAAGAAAACGAATCAAAAATCAAAGT +GTACGCTTGATTTACAGAGATAATGTTATCGAAGTTGTAAATGAATTTAA +ATATCTTGGTGTATTATTTGATAATTATTTAAAGTGGGATATACATATAT +CGAAAACTGCCTCCAAAATATCTAGAACCATATCATGTATAAAACGAATT +AAATATTATTTGCCGAAAAGAATTTTAAAATTGTTATATGATAGTTTGAT +ATTGTCACATATTGACTACGGTATTGTTTTGTGGGGATGTTCAGCAAAGT +GTCATTTGGAAAAGTTACAAAAGTTACAAAATCGTTATGCCCGTTTAATA +CTAAACGTAGATATTTTGACACCTCGTATTATATTATTATCCTCTCTAAG +ATGGCAATCAGTTGTTCAGAGAGTGCAATACCAA +>Scaffold9309 +GAAGGAAGAAGAGGAAAATAATGATGAATTTGTAGAATTTCTATAACGTA +TGAAAACATAAACAACATGAAAAAGTATGAACCGACAGAAGAATGAAAAT +TTCAATCATATAACATGTCATTCACTTCTCTTCTCTGACTGTCAAGTATT +AGGTATTCCTTTTTATTTCCTCTTAAAATGATCATAGTTTCCTATTTCTT +TTACACCATTGGGAAGGGAATTCCAATGTTTTATGGCATTGTAATAAAAC +GAATTTCCAATACTACCTACTCTTTCTGGTAAGTTAAAGTTGAATCGGCT +ATTTCTTGTATTATAATCATGTACGTCAGTAACAAGATCGAAGTTGGATC +GAATATAATGATTCGACCTAGTATGATATATTTTATGCACGTGATGCAAT +ACGAGTTGTTTTGATCTTTGGTCGACTTCAAGAAAACCAGCTTTAGAAAG +TTCGCTGTAGCCAACATGAGTTCTTGCCTTGGACTAGAACAGTTGATAAA +TCTCACCATTTTGTTCTTTAAGATGGGTAGAAGAATCCCTGCAATCTAAA +TGGTCAATTACTGTGAAGTTATTTTTACTGGATGCACCCAATAttttttt +gataatttttttttctttgataatttttttctttttctttaataaatttt +ttggataatttttttttggataaatagttcttttttgataattctaataa +tttttttatttattttttttttttctataattttttttaaaaaatttatt +aatttttaattaaaaaaaaaataaGAGTTAACAGATTAAGGGAAACTGAC +AATTCAAAAAAAAAAAAAA +>Scaffold9310 +GCGGGGGCTGGGGAGGAAGGGGTGGCGTTATTTCACTTCCGATCTAATAC +GCTTTCTTAAGACACTGAAATATCAGTAGGTATTGGTATAGAGAATTACT +TTTTATTTTTAATTAAAACATTATCGAAATGAAGATACAGAGAAAAACGA +TGAGATGTAAGAAGTGCGCGTATTTAtgtgtgtgggtgcgtgtgtgtgtg +tgtgtgtgtgttgtgtgcgtgcgtgtgtgtggtggtgtgtACTAATTTTG +ATGTGTGTTGTGGCACAATTGCAATCATCAGTATCTTCATGAAAATGATA +ACCAGAAGCACAAAAAGGAGGgtgcgtgtgtgtgtgtgtgtgtttagtgt +gcgtgcgtgtgagggtgtttaagtgtgtatgtCGGAAATGTGGCACAATT +GCAATCATCTGTATCTTCATGAAAATGATAACCAGAAGAACAAAAAAAAA +AAACATTGAGAGAACATGTTTTTTTGATGGAAGACAAGAAGTTCTCGTAA +CGTAGGATCTCCGAGACATGATGGGGTCAACTTAAAAAGAGAGCAGTGAG +AGGCATTTATATCGAAGGTCAGGGAAAGGCAAACAAAGAAAGAAAAAAAA +AAGGCTCACAGGAGAACGAAAACACGGGCCAAAATAATAAACAGGAGCAA +GTGAACGGGCAGTTTGGTAGCTACTTCATTTACCGGCTTTTAAaggtact +atgtcccatttgcaggtcaaaaaaaatgaaaaagttaaattccaactgca +tttgaaagataatactaatttacaacttccctaaaaaaggtggggcttga +aaatgtcttcaagtgcggaaaataacgactattagttgtcaaatcgactt +tagggCTATAGAGCCCAAAAGTAATAGTCTTGA +>Scaffold11911 +TTCTTGGCACCCCCCCCCCCCCCACACTCCTGCACTGAAGAACTACTCAA +GTTTAAACTTTGCATTGCTTTTCTTTCTTTTTCAGTATTTTTTGCTTGGT +ACATGTTTCTCTTAATATCTGTCGTATAGatttttaatatttttatttat +atCTACGTCAATCTGGCTGttctttttcttgtcttctttttttttctctc +tcttttttttcctcgtattttGTATTGATCCTTACCCTAGTTTTTGAACT +TGAACAGCAATTTGCAGCACTCAAATTTCTTTAAAATTACCTTCTCTTAT +TTGtctctgttcccctctccccccctctctctctctctctctctctctct +ctctctctctttcATCTCCCATATCATAATTTGAAGTACCATCTATGGTG +TTTTCAGATTGATCTTTCTTGCTTTCCCCACCCTCCCCCTTTATGCAGTT +AATTTTCAGTCTATTTGTGTTTTCTGTGGTTGATTCTAATCATATTCTAA +CTCTTATTTTACATTTTACTTCACTAACAACTGGTTTATTATATTTGTTA +CTAATTTTGAATTAAACTATTTACCATTCTGAACGAACTGAAAGATTAAA +GATCAAACTATCTATGAATAGAATGGTATTTCTTCAATTTATTCAAATTT +CTCTCTCTTTAACCCCCTTTTTCTGCTTGCATTTTTATCCCTTTGCCGTG +GACTTCACTGGATATTTTGCTTTGATGCCAATCCAACAATTTTGCATATA +TTA diff --git a/tool_collections/fastx_toolkit/fasta_formatter/test-data/fasta_formatter1.out b/tool_collections/fastx_toolkit/fasta_formatter/test-data/fasta_formatter1.out new file mode 100644 index 00000000000..dab0e10a3e3 --- /dev/null +++ b/tool_collections/fastx_toolkit/fasta_formatter/test-data/fasta_formatter1.out @@ -0,0 +1,10 @@ +>Scaffold3648 +AGGAATGATGACTACAATGATCAACTTAACCTATCTATTTAATTTAGTTCCCTAATGTCAGGGACCTACCTGTTTTTGTTATGTTTGGGTTTTGTTGTTGTTGTTTTTTTAATCTGAAGGTATTGTGCATTATATGACCTGTAATACACAGTATAACTTTTCAAATACTTTTGTTTTACAACTTTTCTCTCTGGACTTATATTAAAGTCAATTTTAATGAACATGTAGTAAAAACTAATACATGTACATCTACAGTTTATTTATTTTTTTCTTCTTCTTTTTGTATTTCTTGTGTTACATTATTTCACTTCACGTTCATGTTACCAACCTTGCCCCCTTGCTTTCCATGCAAAAAAAGAAAAAAAAGAAGCAATACTTACACTTACCCTTGAGATATCTTGATCTGAATGCTTTAACATTCTATATGTACAATAAATTTTTGTATCTATAGCCTATTATTATATATGTTGCTATGTCAGGCACATTGACAACATTCTCAGAAGGTTAGAAGATGGTATTGTTCTGAAATGCCTGGAATGCCTTGTGAACTAAGATGATTACTCATGTCATTAAAGTCCCCTAACCCAGGTATTTCCTCCTTCCCATGACGAAAACAGTCCATTTAAACTTCACCCCACTTTGGACCCGAAAGTGGGGTGCATTTTGGTGGTAAGCTCACCACAGAGCAAGAGAGAGTTAGAGTCCCTAATCTGCAGTGTAAACAAACTTTGCCAGGACATCACCAGCCCAACCTTGATAAGTACTGCTTGGAACTCCTCCATGATGTTCTAGTCTTATTCGCAGTCTCATATAGGTTCGGATTTTGTCCATTCTCATAGCTACCAGTATACATGGGAGATGCCAGTTTCATCTTCCTTGCTTCACTTTATAAGCATAGTTATATCANGAACTTCCTGGTTATAATTATGTTCCTTTCAAGTTTCATCATAATTGTCTAGTTCGATATAGTACATGGACACAATTAAATATGATATTGTCT +>Scaffold9299 +CAGCATCTACATAATATGATCGCTATTAAACTTAAATCTCCTTGACGGAGTCTTCGGTCATAACACAAACCCAGACCTACGTATATGACAAAGCTAATAGGAAAAGCATCCTTGTTTGTTTCACTATGCTTTTTAATGGTTGACGTTAAaggtaaagaccagtattggaaacgccccaatttcaaaaaatgaaatggaagctctcattaccaatcatgtgaaagaatatgttttgactaatacatgatgataaaaaaattgccgggaaaccgcctactaattcatatatttagtaaatttgtttctctcatggtctgtgagagatatagggtagtcccatatacatctttctgtgtatagtgcttgtaactttacgaagaatgggccaaatttcttatcattttgatgattccagaaccttgcagatgcgagatggtagatgatcaaccttttctgatcgattccataacgtttctttcacaatgcaatcgcatgaccataactggtctttacctTTAAGTTGTAGGTCTTAATTGATAACACTATATAGTTTTTTTCTTTTTACTGTTTTTATTAATGACCTCTGTAATTTGCCCTATTGTGAAAATACTAAAATATGTTTATACGCCGATGATGCGGCAATATTTTGCCAAGGCAAAGAAATTGCCCTTGTTGAGAAAACTCTTAAATGTGAGTTTAAAAAAATAGTTGATCACATTGAAAAAGATGACTTAATGTTGAATATCAAGAAGTGTAAGATCATGTTATTTGGGACAAGAAAACGAATCAAAAATCAAAGTGTACGCTTGATTTACAGAGATAATGTTATCGAAGTTGTAAATGAATTTAAATATCTTGGTGTATTATTTGATAATTATTTAAAGTGGGATATACATATATCGAAAACTGCCTCCAAAATATCTAGAACCATATCATGTATAAAACGAATTAAATATTATTTGCCGAAAAGAATTTTAAAATTGTTATATGATAGTTTGATATTGTCACATATTGACTACGGTATTGTTTTGTGGGGATGTTCAGCAAAGTGTCATTTGGAAAAGTTACAAAAGTTACAAAATCGTTATGCCCGTTTAATACTAAACGTAGATATTTTGACACCTCGTATTATATTATTATCCTCTCTAAGATGGCAATCAGTTGTTCAGAGAGTGCAATACCAA +>Scaffold9309 +GAAGGAAGAAGAGGAAAATAATGATGAATTTGTAGAATTTCTATAACGTATGAAAACATAAACAACATGAAAAAGTATGAACCGACAGAAGAATGAAAATTTCAATCATATAACATGTCATTCACTTCTCTTCTCTGACTGTCAAGTATTAGGTATTCCTTTTTATTTCCTCTTAAAATGATCATAGTTTCCTATTTCTTTTACACCATTGGGAAGGGAATTCCAATGTTTTATGGCATTGTAATAAAACGAATTTCCAATACTACCTACTCTTTCTGGTAAGTTAAAGTTGAATCGGCTATTTCTTGTATTATAATCATGTACGTCAGTAACAAGATCGAAGTTGGATCGAATATAATGATTCGACCTAGTATGATATATTTTATGCACGTGATGCAATACGAGTTGTTTTGATCTTTGGTCGACTTCAAGAAAACCAGCTTTAGAAAGTTCGCTGTAGCCAACATGAGTTCTTGCCTTGGACTAGAACAGTTGATAAATCTCACCATTTTGTTCTTTAAGATGGGTAGAAGAATCCCTGCAATCTAAATGGTCAATTACTGTGAAGTTATTTTTACTGGATGCACCCAATAtttttttgataatttttttttctttgataatttttttctttttctttaataaattttttggataatttttttttggataaatagttcttttttgataattctaataatttttttatttattttttttttttctataattttttttaaaaaatttattaatttttaattaaaaaaaaaataaGAGTTAACAGATTAAGGGAAACTGACAATTCAAAAAAAAAAAAAA +>Scaffold9310 +GCGGGGGCTGGGGAGGAAGGGGTGGCGTTATTTCACTTCCGATCTAATACGCTTTCTTAAGACACTGAAATATCAGTAGGTATTGGTATAGAGAATTACTTTTTATTTTTAATTAAAACATTATCGAAATGAAGATACAGAGAAAAACGATGAGATGTAAGAAGTGCGCGTATTTAtgtgtgtgggtgcgtgtgtgtgtgtgtgtgtgtgttgtgtgcgtgcgtgtgtgtggtggtgtgtACTAATTTTGATGTGTGTTGTGGCACAATTGCAATCATCAGTATCTTCATGAAAATGATAACCAGAAGCACAAAAAGGAGGgtgcgtgtgtgtgtgtgtgtgtttagtgtgcgtgcgtgtgagggtgtttaagtgtgtatgtCGGAAATGTGGCACAATTGCAATCATCTGTATCTTCATGAAAATGATAACCAGAAGAACAAAAAAAAAAAACATTGAGAGAACATGTTTTTTTGATGGAAGACAAGAAGTTCTCGTAACGTAGGATCTCCGAGACATGATGGGGTCAACTTAAAAAGAGAGCAGTGAGAGGCATTTATATCGAAGGTCAGGGAAAGGCAAACAAAGAAAGAAAAAAAAAAGGCTCACAGGAGAACGAAAACACGGGCCAAAATAATAAACAGGAGCAAGTGAACGGGCAGTTTGGTAGCTACTTCATTTACCGGCTTTTAAaggtactatgtcccatttgcaggtcaaaaaaaatgaaaaagttaaattccaactgcatttgaaagataatactaatttacaacttccctaaaaaaggtggggcttgaaaatgtcttcaagtgcggaaaataacgactattagttgtcaaatcgactttagggCTATAGAGCCCAAAAGTAATAGTCTTGA +>Scaffold11911 +TTCTTGGCACCCCCCCCCCCCCCACACTCCTGCACTGAAGAACTACTCAAGTTTAAACTTTGCATTGCTTTTCTTTCTTTTTCAGTATTTTTTGCTTGGTACATGTTTCTCTTAATATCTGTCGTATAGatttttaatatttttatttatatCTACGTCAATCTGGCTGttctttttcttgtcttctttttttttctctctcttttttttcctcgtattttGTATTGATCCTTACCCTAGTTTTTGAACTTGAACAGCAATTTGCAGCACTCAAATTTCTTTAAAATTACCTTCTCTTATTTGtctctgttcccctctccccccctctctctctctctctctctctctctctctctctctttcATCTCCCATATCATAATTTGAAGTACCATCTATGGTGTTTTCAGATTGATCTTTCTTGCTTTCCCCACCCTCCCCCTTTATGCAGTTAATTTTCAGTCTATTTGTGTTTTCTGTGGTTGATTCTAATCATATTCTAACTCTTATTTTACATTTTACTTCACTAACAACTGGTTTATTATATTTGTTACTAATTTTGAATTAAACTATTTACCATTCTGAACGAACTGAAAGATTAAAGATCAAACTATCTATGAATAGAATGGTATTTCTTCAATTTATTCAAATTTCTCTCTCTTTAACCCCCTTTTTCTGCTTGCATTTTTATCCCTTTGCCGTGGACTTCACTGGATATTTTGCTTTGATGCCAATCCAACAATTTTGCATATATTA diff --git a/tool_collections/fastx_toolkit/fasta_formatter/test-data/fasta_formatter2.out b/tool_collections/fastx_toolkit/fasta_formatter/test-data/fasta_formatter2.out new file mode 100644 index 00000000000..40fbc313e49 --- /dev/null +++ b/tool_collections/fastx_toolkit/fasta_formatter/test-data/fasta_formatter2.out @@ -0,0 +1,84 @@ +>Scaffold3648 +AGGAATGATGACTACAATGATCAACTTAACCTATCTATTTAATTTAGTTCCCTAATGTCA +GGGACCTACCTGTTTTTGTTATGTTTGGGTTTTGTTGTTGTTGTTTTTTTAATCTGAAGG +TATTGTGCATTATATGACCTGTAATACACAGTATAACTTTTCAAATACTTTTGTTTTACA +ACTTTTCTCTCTGGACTTATATTAAAGTCAATTTTAATGAACATGTAGTAAAAACTAATA +CATGTACATCTACAGTTTATTTATTTTTTTCTTCTTCTTTTTGTATTTCTTGTGTTACAT +TATTTCACTTCACGTTCATGTTACCAACCTTGCCCCCTTGCTTTCCATGCAAAAAAAGAA +AAAAAAGAAGCAATACTTACACTTACCCTTGAGATATCTTGATCTGAATGCTTTAACATT +CTATATGTACAATAAATTTTTGTATCTATAGCCTATTATTATATATGTTGCTATGTCAGG +CACATTGACAACATTCTCAGAAGGTTAGAAGATGGTATTGTTCTGAAATGCCTGGAATGC +CTTGTGAACTAAGATGATTACTCATGTCATTAAAGTCCCCTAACCCAGGTATTTCCTCCT +TCCCATGACGAAAACAGTCCATTTAAACTTCACCCCACTTTGGACCCGAAAGTGGGGTGC +ATTTTGGTGGTAAGCTCACCACAGAGCAAGAGAGAGTTAGAGTCCCTAATCTGCAGTGTA +AACAAACTTTGCCAGGACATCACCAGCCCAACCTTGATAAGTACTGCTTGGAACTCCTCC +ATGATGTTCTAGTCTTATTCGCAGTCTCATATAGGTTCGGATTTTGTCCATTCTCATAGC +TACCAGTATACATGGGAGATGCCAGTTTCATCTTCCTTGCTTCACTTTATAAGCATAGTT +ATATCANGAACTTCCTGGTTATAATTATGTTCCTTTCAAGTTTCATCATAATTGTCTAGT +TCGATATAGTACATGGACACAATTAAATATGATATTGTCT +>Scaffold9299 +CAGCATCTACATAATATGATCGCTATTAAACTTAAATCTCCTTGACGGAGTCTTCGGTCA +TAACACAAACCCAGACCTACGTATATGACAAAGCTAATAGGAAAAGCATCCTTGTTTGTT +TCACTATGCTTTTTAATGGTTGACGTTAAaggtaaagaccagtattggaaacgccccaat +ttcaaaaaatgaaatggaagctctcattaccaatcatgtgaaagaatatgttttgactaa +tacatgatgataaaaaaattgccgggaaaccgcctactaattcatatatttagtaaattt +gtttctctcatggtctgtgagagatatagggtagtcccatatacatctttctgtgtatag +tgcttgtaactttacgaagaatgggccaaatttcttatcattttgatgattccagaacct +tgcagatgcgagatggtagatgatcaaccttttctgatcgattccataacgtttctttca +caatgcaatcgcatgaccataactggtctttacctTTAAGTTGTAGGTCTTAATTGATAA +CACTATATAGTTTTTTTCTTTTTACTGTTTTTATTAATGACCTCTGTAATTTGCCCTATT +GTGAAAATACTAAAATATGTTTATACGCCGATGATGCGGCAATATTTTGCCAAGGCAAAG +AAATTGCCCTTGTTGAGAAAACTCTTAAATGTGAGTTTAAAAAAATAGTTGATCACATTG +AAAAAGATGACTTAATGTTGAATATCAAGAAGTGTAAGATCATGTTATTTGGGACAAGAA +AACGAATCAAAAATCAAAGTGTACGCTTGATTTACAGAGATAATGTTATCGAAGTTGTAA +ATGAATTTAAATATCTTGGTGTATTATTTGATAATTATTTAAAGTGGGATATACATATAT +CGAAAACTGCCTCCAAAATATCTAGAACCATATCATGTATAAAACGAATTAAATATTATT +TGCCGAAAAGAATTTTAAAATTGTTATATGATAGTTTGATATTGTCACATATTGACTACG +GTATTGTTTTGTGGGGATGTTCAGCAAAGTGTCATTTGGAAAAGTTACAAAAGTTACAAA +ATCGTTATGCCCGTTTAATACTAAACGTAGATATTTTGACACCTCGTATTATATTATTAT +CCTCTCTAAGATGGCAATCAGTTGTTCAGAGAGTGCAATACCAA +>Scaffold9309 +GAAGGAAGAAGAGGAAAATAATGATGAATTTGTAGAATTTCTATAACGTATGAAAACATA +AACAACATGAAAAAGTATGAACCGACAGAAGAATGAAAATTTCAATCATATAACATGTCA +TTCACTTCTCTTCTCTGACTGTCAAGTATTAGGTATTCCTTTTTATTTCCTCTTAAAATG +ATCATAGTTTCCTATTTCTTTTACACCATTGGGAAGGGAATTCCAATGTTTTATGGCATT +GTAATAAAACGAATTTCCAATACTACCTACTCTTTCTGGTAAGTTAAAGTTGAATCGGCT +ATTTCTTGTATTATAATCATGTACGTCAGTAACAAGATCGAAGTTGGATCGAATATAATG +ATTCGACCTAGTATGATATATTTTATGCACGTGATGCAATACGAGTTGTTTTGATCTTTG +GTCGACTTCAAGAAAACCAGCTTTAGAAAGTTCGCTGTAGCCAACATGAGTTCTTGCCTT +GGACTAGAACAGTTGATAAATCTCACCATTTTGTTCTTTAAGATGGGTAGAAGAATCCCT +GCAATCTAAATGGTCAATTACTGTGAAGTTATTTTTACTGGATGCACCCAATAttttttt +gataatttttttttctttgataatttttttctttttctttaataaattttttggataatt +tttttttggataaatagttcttttttgataattctaataatttttttatttatttttttt +ttttctataattttttttaaaaaatttattaatttttaattaaaaaaaaaataaGAGTTA +ACAGATTAAGGGAAACTGACAATTCAAAAAAAAAAAAAA +>Scaffold9310 +GCGGGGGCTGGGGAGGAAGGGGTGGCGTTATTTCACTTCCGATCTAATACGCTTTCTTAA +GACACTGAAATATCAGTAGGTATTGGTATAGAGAATTACTTTTTATTTTTAATTAAAACA +TTATCGAAATGAAGATACAGAGAAAAACGATGAGATGTAAGAAGTGCGCGTATTTAtgtg +tgtgggtgcgtgtgtgtgtgtgtgtgtgtgttgtgtgcgtgcgtgtgtgtggtggtgtgt +ACTAATTTTGATGTGTGTTGTGGCACAATTGCAATCATCAGTATCTTCATGAAAATGATA +ACCAGAAGCACAAAAAGGAGGgtgcgtgtgtgtgtgtgtgtgtttagtgtgcgtgcgtgt +gagggtgtttaagtgtgtatgtCGGAAATGTGGCACAATTGCAATCATCTGTATCTTCAT +GAAAATGATAACCAGAAGAACAAAAAAAAAAAACATTGAGAGAACATGTTTTTTTGATGG +AAGACAAGAAGTTCTCGTAACGTAGGATCTCCGAGACATGATGGGGTCAACTTAAAAAGA +GAGCAGTGAGAGGCATTTATATCGAAGGTCAGGGAAAGGCAAACAAAGAAAGAAAAAAAA +AAGGCTCACAGGAGAACGAAAACACGGGCCAAAATAATAAACAGGAGCAAGTGAACGGGC +AGTTTGGTAGCTACTTCATTTACCGGCTTTTAAaggtactatgtcccatttgcaggtcaa +aaaaaatgaaaaagttaaattccaactgcatttgaaagataatactaatttacaacttcc +ctaaaaaaggtggggcttgaaaatgtcttcaagtgcggaaaataacgactattagttgtc +aaatcgactttagggCTATAGAGCCCAAAAGTAATAGTCTTGA +>Scaffold11911 +TTCTTGGCACCCCCCCCCCCCCCACACTCCTGCACTGAAGAACTACTCAAGTTTAAACTT +TGCATTGCTTTTCTTTCTTTTTCAGTATTTTTTGCTTGGTACATGTTTCTCTTAATATCT +GTCGTATAGatttttaatatttttatttatatCTACGTCAATCTGGCTGttctttttctt +gtcttctttttttttctctctcttttttttcctcgtattttGTATTGATCCTTACCCTAG +TTTTTGAACTTGAACAGCAATTTGCAGCACTCAAATTTCTTTAAAATTACCTTCTCTTAT +TTGtctctgttcccctctccccccctctctctctctctctctctctctctctctctctct +ttcATCTCCCATATCATAATTTGAAGTACCATCTATGGTGTTTTCAGATTGATCTTTCTT +GCTTTCCCCACCCTCCCCCTTTATGCAGTTAATTTTCAGTCTATTTGTGTTTTCTGTGGT +TGATTCTAATCATATTCTAACTCTTATTTTACATTTTACTTCACTAACAACTGGTTTATT +ATATTTGTTACTAATTTTGAATTAAACTATTTACCATTCTGAACGAACTGAAAGATTAAA +GATCAAACTATCTATGAATAGAATGGTATTTCTTCAATTTATTCAAATTTCTCTCTCTTT +AACCCCCTTTTTCTGCTTGCATTTTTATCCCTTTGCCGTGGACTTCACTGGATATTTTGC +TTTGATGCCAATCCAACAATTTTGCATATATTA diff --git a/tool_collections/fastx_toolkit/fasta_nucleotide_changer/.shed.yml b/tool_collections/fastx_toolkit/fasta_nucleotide_changer/.shed.yml new file mode 100644 index 00000000000..00e61fb218e --- /dev/null +++ b/tool_collections/fastx_toolkit/fasta_nucleotide_changer/.shed.yml @@ -0,0 +1,10 @@ +categories: +- Fasta Manipulation +description: RNA/DNA converter. +long_description: | + This tool converts RNA FASTA files to DNA (and vice-versa). +homepage_url: http://hannonlab.cshl.edu/fastx_toolkit/ +name: fasta_nucleotide_changer +owner: devteam +remote_repository_url: https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/fastx_toolkit/fasta_nucleotide_changer +type: unrestricted diff --git a/tool_collections/fastx_toolkit/fasta_nucleotide_changer/fasta_nucleotide_changer.xml b/tool_collections/fastx_toolkit/fasta_nucleotide_changer/fasta_nucleotide_changer.xml new file mode 100644 index 00000000000..b00256b40bb --- /dev/null +++ b/tool_collections/fastx_toolkit/fasta_nucleotide_changer/fasta_nucleotide_changer.xml @@ -0,0 +1,74 @@ + + converter + + macros.xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +**What it does** + +This tool converts RNA FASTA files to DNA (and vice-versa). + +In **RNA-to-DNA** mode, U's are changed into T's. + +In **DNA-to-RNA** mode, T's are changed into U's. + +-------- + +**Example** + +Input RNA FASTA file ( from Sanger's mirBase ):: + + >cel-let-7 MIMAT0000001 Caenorhabditis elegans let-7 + UGAGGUAGUAGGUUGUAUAGUU + >cel-lin-4 MIMAT0000002 Caenorhabditis elegans lin-4 + UCCCUGAGACCUCAAGUGUGA + >cel-miR-1 MIMAT0000003 Caenorhabditis elegans miR-1 + UGGAAUGUAAAGAAGUAUGUA + +Output DNA FASTA file (with RNA-to-DNA mode):: + + >cel-let-7 MIMAT0000001 Caenorhabditis elegans let-7 + TGAGGTAGTAGGTTGTATAGTT + >cel-lin-4 MIMAT0000002 Caenorhabditis elegans lin-4 + TCCCTGAGACCTCAAGTGTGA + >cel-miR-1 MIMAT0000003 Caenorhabditis elegans miR-1 + TGGAATGTAAAGAAGTATGTA + + +------ + +This tool is based on `FASTX-toolkit`__ by Assaf Gordon. + + .. __: http://hannonlab.cshl.edu/fastx_toolkit/ + + + diff --git a/tool_collections/fastx_toolkit/fasta_nucleotide_changer/macros.xml b/tool_collections/fastx_toolkit/fasta_nucleotide_changer/macros.xml new file mode 120000 index 00000000000..0c6ff6c3756 --- /dev/null +++ b/tool_collections/fastx_toolkit/fasta_nucleotide_changer/macros.xml @@ -0,0 +1 @@ +../macros.xml \ No newline at end of file diff --git a/tool_collections/fastx_toolkit/fasta_nucleotide_changer/test-data/fasta_nuc_change1.out b/tool_collections/fastx_toolkit/fasta_nucleotide_changer/test-data/fasta_nuc_change1.out new file mode 100644 index 00000000000..e46135f36f4 --- /dev/null +++ b/tool_collections/fastx_toolkit/fasta_nucleotide_changer/test-data/fasta_nuc_change1.out @@ -0,0 +1,50 @@ +>cel-let-7 MIMAT0000001 Caenorhabditis elegans let-7 +UGAGGUAGUAGGUUGUAUAGUU +>cel-lin-4 MIMAT0000002 Caenorhabditis elegans lin-4 +UCCCUGAGACCUCAAGUGUGA +>cel-miR-1 MIMAT0000003 Caenorhabditis elegans miR-1 +UGGAAUGUAAAGAAGUAUGUA +>cel-miR-2 MIMAT0000004 Caenorhabditis elegans miR-2 +UAUCACAGCCAGCUUUGAUGUGC +>cel-miR-34 MIMAT0000005 Caenorhabditis elegans miR-34 +AGGCAGUGUGGUUAGCUGGUUG +>cel-miR-35 MIMAT0000006 Caenorhabditis elegans miR-35 +UCACCGGGUGGAAACUAGCAGU +>cel-miR-36 MIMAT0000007 Caenorhabditis elegans miR-36 +UCACCGGGUGAAAAUUCGCAUG +>cel-miR-37 MIMAT0000008 Caenorhabditis elegans miR-37 +UCACCGGGUGAACACUUGCAGU +>cel-miR-38 MIMAT0000009 Caenorhabditis elegans miR-38 +UCACCGGGAGAAAAACUGGAGU +>cel-miR-39 MIMAT0000010 Caenorhabditis elegans miR-39 +UCACCGGGUGUAAAUCAGCUUG +>cel-miR-40 MIMAT0000011 Caenorhabditis elegans miR-40 +UCACCGGGUGUACAUCAGCUAA +>cel-miR-41 MIMAT0000012 Caenorhabditis elegans miR-41 +UCACCGGGUGAAAAAUCACCUA +>cel-miR-42 MIMAT0000013 Caenorhabditis elegans miR-42 +UCACCGGGUUAACAUCUACAGA +>cel-miR-43 MIMAT0000014 Caenorhabditis elegans miR-43 +UAUCACAGUUUACUUGCUGUCGC +>cel-miR-44 MIMAT0000015 Caenorhabditis elegans miR-44 +UGACUAGAGACACAUUCAGCU +>cel-miR-45 MIMAT0000016 Caenorhabditis elegans miR-45 +UGACUAGAGACACAUUCAGCU +>cel-miR-46 MIMAT0000017 Caenorhabditis elegans miR-46 +UGUCAUGGAGUCGCUCUCUUCA +>cel-miR-47 MIMAT0000018 Caenorhabditis elegans miR-47 +UGUCAUGGAGGCGCUCUCUUCA +>cel-miR-48 MIMAT0000019 Caenorhabditis elegans miR-48 +UGAGGUAGGCUCAGUAGAUGCGA +>cel-miR-49 MIMAT0000020 Caenorhabditis elegans miR-49 +AAGCACCACGAGAAGCUGCAGA +>cel-miR-50 MIMAT0000021 Caenorhabditis elegans miR-50 +UGAUAUGUCUGGUAUUCUUGGG +>cel-miR-51 MIMAT0000022 Caenorhabditis elegans miR-51 +UACCCGUAGCUCCUAUCCAUGUU +>cel-miR-52 MIMAT0000023 Caenorhabditis elegans miR-52 +CACCCGUACAUAUGUUUCCGUGCU +>cel-miR-53 MIMAT0000024 Caenorhabditis elegans miR-53 +CACCCGUACAUUUGUUUCCGUGCU +>cel-miR-54 MIMAT0000025 Caenorhabditis elegans miR-54 +UACCCGUAAUCUUCAUAAUCCGAG diff --git a/tool_collections/fastx_toolkit/fasta_nucleotide_changer/test-data/fasta_nuc_change2.out b/tool_collections/fastx_toolkit/fasta_nucleotide_changer/test-data/fasta_nuc_change2.out new file mode 100644 index 00000000000..c42d9dc113f --- /dev/null +++ b/tool_collections/fastx_toolkit/fasta_nucleotide_changer/test-data/fasta_nuc_change2.out @@ -0,0 +1,50 @@ +>cel-let-7 MIMAT0000001 Caenorhabditis elegans let-7 +TGAGGTAGTAGGTTGTATAGTT +>cel-lin-4 MIMAT0000002 Caenorhabditis elegans lin-4 +TCCCTGAGACCTCAAGTGTGA +>cel-miR-1 MIMAT0000003 Caenorhabditis elegans miR-1 +TGGAATGTAAAGAAGTATGTA +>cel-miR-2 MIMAT0000004 Caenorhabditis elegans miR-2 +TATCACAGCCAGCTTTGATGTGC +>cel-miR-34 MIMAT0000005 Caenorhabditis elegans miR-34 +AGGCAGTGTGGTTAGCTGGTTG +>cel-miR-35 MIMAT0000006 Caenorhabditis elegans miR-35 +TCACCGGGTGGAAACTAGCAGT +>cel-miR-36 MIMAT0000007 Caenorhabditis elegans miR-36 +TCACCGGGTGAAAATTCGCATG +>cel-miR-37 MIMAT0000008 Caenorhabditis elegans miR-37 +TCACCGGGTGAACACTTGCAGT +>cel-miR-38 MIMAT0000009 Caenorhabditis elegans miR-38 +TCACCGGGAGAAAAACTGGAGT +>cel-miR-39 MIMAT0000010 Caenorhabditis elegans miR-39 +TCACCGGGTGTAAATCAGCTTG +>cel-miR-40 MIMAT0000011 Caenorhabditis elegans miR-40 +TCACCGGGTGTACATCAGCTAA +>cel-miR-41 MIMAT0000012 Caenorhabditis elegans miR-41 +TCACCGGGTGAAAAATCACCTA +>cel-miR-42 MIMAT0000013 Caenorhabditis elegans miR-42 +TCACCGGGTTAACATCTACAGA +>cel-miR-43 MIMAT0000014 Caenorhabditis elegans miR-43 +TATCACAGTTTACTTGCTGTCGC +>cel-miR-44 MIMAT0000015 Caenorhabditis elegans miR-44 +TGACTAGAGACACATTCAGCT +>cel-miR-45 MIMAT0000016 Caenorhabditis elegans miR-45 +TGACTAGAGACACATTCAGCT +>cel-miR-46 MIMAT0000017 Caenorhabditis elegans miR-46 +TGTCATGGAGTCGCTCTCTTCA +>cel-miR-47 MIMAT0000018 Caenorhabditis elegans miR-47 +TGTCATGGAGGCGCTCTCTTCA +>cel-miR-48 MIMAT0000019 Caenorhabditis elegans miR-48 +TGAGGTAGGCTCAGTAGATGCGA +>cel-miR-49 MIMAT0000020 Caenorhabditis elegans miR-49 +AAGCACCACGAGAAGCTGCAGA +>cel-miR-50 MIMAT0000021 Caenorhabditis elegans miR-50 +TGATATGTCTGGTATTCTTGGG +>cel-miR-51 MIMAT0000022 Caenorhabditis elegans miR-51 +TACCCGTAGCTCCTATCCATGTT +>cel-miR-52 MIMAT0000023 Caenorhabditis elegans miR-52 +CACCCGTACATATGTTTCCGTGCT +>cel-miR-53 MIMAT0000024 Caenorhabditis elegans miR-53 +CACCCGTACATTTGTTTCCGTGCT +>cel-miR-54 MIMAT0000025 Caenorhabditis elegans miR-54 +TACCCGTAATCTTCATAATCCGAG diff --git a/tool_collections/fastx_toolkit/fasta_nucleotide_changer/test-data/fasta_nuc_changer1.fasta b/tool_collections/fastx_toolkit/fasta_nucleotide_changer/test-data/fasta_nuc_changer1.fasta new file mode 100644 index 00000000000..c42d9dc113f --- /dev/null +++ b/tool_collections/fastx_toolkit/fasta_nucleotide_changer/test-data/fasta_nuc_changer1.fasta @@ -0,0 +1,50 @@ +>cel-let-7 MIMAT0000001 Caenorhabditis elegans let-7 +TGAGGTAGTAGGTTGTATAGTT +>cel-lin-4 MIMAT0000002 Caenorhabditis elegans lin-4 +TCCCTGAGACCTCAAGTGTGA +>cel-miR-1 MIMAT0000003 Caenorhabditis elegans miR-1 +TGGAATGTAAAGAAGTATGTA +>cel-miR-2 MIMAT0000004 Caenorhabditis elegans miR-2 +TATCACAGCCAGCTTTGATGTGC +>cel-miR-34 MIMAT0000005 Caenorhabditis elegans miR-34 +AGGCAGTGTGGTTAGCTGGTTG +>cel-miR-35 MIMAT0000006 Caenorhabditis elegans miR-35 +TCACCGGGTGGAAACTAGCAGT +>cel-miR-36 MIMAT0000007 Caenorhabditis elegans miR-36 +TCACCGGGTGAAAATTCGCATG +>cel-miR-37 MIMAT0000008 Caenorhabditis elegans miR-37 +TCACCGGGTGAACACTTGCAGT +>cel-miR-38 MIMAT0000009 Caenorhabditis elegans miR-38 +TCACCGGGAGAAAAACTGGAGT +>cel-miR-39 MIMAT0000010 Caenorhabditis elegans miR-39 +TCACCGGGTGTAAATCAGCTTG +>cel-miR-40 MIMAT0000011 Caenorhabditis elegans miR-40 +TCACCGGGTGTACATCAGCTAA +>cel-miR-41 MIMAT0000012 Caenorhabditis elegans miR-41 +TCACCGGGTGAAAAATCACCTA +>cel-miR-42 MIMAT0000013 Caenorhabditis elegans miR-42 +TCACCGGGTTAACATCTACAGA +>cel-miR-43 MIMAT0000014 Caenorhabditis elegans miR-43 +TATCACAGTTTACTTGCTGTCGC +>cel-miR-44 MIMAT0000015 Caenorhabditis elegans miR-44 +TGACTAGAGACACATTCAGCT +>cel-miR-45 MIMAT0000016 Caenorhabditis elegans miR-45 +TGACTAGAGACACATTCAGCT +>cel-miR-46 MIMAT0000017 Caenorhabditis elegans miR-46 +TGTCATGGAGTCGCTCTCTTCA +>cel-miR-47 MIMAT0000018 Caenorhabditis elegans miR-47 +TGTCATGGAGGCGCTCTCTTCA +>cel-miR-48 MIMAT0000019 Caenorhabditis elegans miR-48 +TGAGGTAGGCTCAGTAGATGCGA +>cel-miR-49 MIMAT0000020 Caenorhabditis elegans miR-49 +AAGCACCACGAGAAGCTGCAGA +>cel-miR-50 MIMAT0000021 Caenorhabditis elegans miR-50 +TGATATGTCTGGTATTCTTGGG +>cel-miR-51 MIMAT0000022 Caenorhabditis elegans miR-51 +TACCCGTAGCTCCTATCCATGTT +>cel-miR-52 MIMAT0000023 Caenorhabditis elegans miR-52 +CACCCGTACATATGTTTCCGTGCT +>cel-miR-53 MIMAT0000024 Caenorhabditis elegans miR-53 +CACCCGTACATTTGTTTCCGTGCT +>cel-miR-54 MIMAT0000025 Caenorhabditis elegans miR-54 +TACCCGTAATCTTCATAATCCGAG diff --git a/tool_collections/fastx_toolkit/fasta_nucleotide_changer/test-data/fasta_nuc_changer2.fasta b/tool_collections/fastx_toolkit/fasta_nucleotide_changer/test-data/fasta_nuc_changer2.fasta new file mode 100644 index 00000000000..e46135f36f4 --- /dev/null +++ b/tool_collections/fastx_toolkit/fasta_nucleotide_changer/test-data/fasta_nuc_changer2.fasta @@ -0,0 +1,50 @@ +>cel-let-7 MIMAT0000001 Caenorhabditis elegans let-7 +UGAGGUAGUAGGUUGUAUAGUU +>cel-lin-4 MIMAT0000002 Caenorhabditis elegans lin-4 +UCCCUGAGACCUCAAGUGUGA +>cel-miR-1 MIMAT0000003 Caenorhabditis elegans miR-1 +UGGAAUGUAAAGAAGUAUGUA +>cel-miR-2 MIMAT0000004 Caenorhabditis elegans miR-2 +UAUCACAGCCAGCUUUGAUGUGC +>cel-miR-34 MIMAT0000005 Caenorhabditis elegans miR-34 +AGGCAGUGUGGUUAGCUGGUUG +>cel-miR-35 MIMAT0000006 Caenorhabditis elegans miR-35 +UCACCGGGUGGAAACUAGCAGU +>cel-miR-36 MIMAT0000007 Caenorhabditis elegans miR-36 +UCACCGGGUGAAAAUUCGCAUG +>cel-miR-37 MIMAT0000008 Caenorhabditis elegans miR-37 +UCACCGGGUGAACACUUGCAGU +>cel-miR-38 MIMAT0000009 Caenorhabditis elegans miR-38 +UCACCGGGAGAAAAACUGGAGU +>cel-miR-39 MIMAT0000010 Caenorhabditis elegans miR-39 +UCACCGGGUGUAAAUCAGCUUG +>cel-miR-40 MIMAT0000011 Caenorhabditis elegans miR-40 +UCACCGGGUGUACAUCAGCUAA +>cel-miR-41 MIMAT0000012 Caenorhabditis elegans miR-41 +UCACCGGGUGAAAAAUCACCUA +>cel-miR-42 MIMAT0000013 Caenorhabditis elegans miR-42 +UCACCGGGUUAACAUCUACAGA +>cel-miR-43 MIMAT0000014 Caenorhabditis elegans miR-43 +UAUCACAGUUUACUUGCUGUCGC +>cel-miR-44 MIMAT0000015 Caenorhabditis elegans miR-44 +UGACUAGAGACACAUUCAGCU +>cel-miR-45 MIMAT0000016 Caenorhabditis elegans miR-45 +UGACUAGAGACACAUUCAGCU +>cel-miR-46 MIMAT0000017 Caenorhabditis elegans miR-46 +UGUCAUGGAGUCGCUCUCUUCA +>cel-miR-47 MIMAT0000018 Caenorhabditis elegans miR-47 +UGUCAUGGAGGCGCUCUCUUCA +>cel-miR-48 MIMAT0000019 Caenorhabditis elegans miR-48 +UGAGGUAGGCUCAGUAGAUGCGA +>cel-miR-49 MIMAT0000020 Caenorhabditis elegans miR-49 +AAGCACCACGAGAAGCUGCAGA +>cel-miR-50 MIMAT0000021 Caenorhabditis elegans miR-50 +UGAUAUGUCUGGUAUUCUUGGG +>cel-miR-51 MIMAT0000022 Caenorhabditis elegans miR-51 +UACCCGUAGCUCCUAUCCAUGUU +>cel-miR-52 MIMAT0000023 Caenorhabditis elegans miR-52 +CACCCGUACAUAUGUUUCCGUGCU +>cel-miR-53 MIMAT0000024 Caenorhabditis elegans miR-53 +CACCCGUACAUUUGUUUCCGUGCU +>cel-miR-54 MIMAT0000025 Caenorhabditis elegans miR-54 +UACCCGUAAUCUUCAUAAUCCGAG diff --git a/tool_collections/fastx_toolkit/fastq_quality_boxplot/.shed.yml b/tool_collections/fastx_toolkit/fastq_quality_boxplot/.shed.yml new file mode 100644 index 00000000000..cd52b7af998 --- /dev/null +++ b/tool_collections/fastx_toolkit/fastq_quality_boxplot/.shed.yml @@ -0,0 +1,12 @@ +categories: +- Fastq Manipulation +- Graphics +- Statistics +description: Draw quality score boxplot +long_description: | + Creates a boxplot graph for the quality scores in the library. +homepage_url: http://hannonlab.cshl.edu/fastx_toolkit/ +name: fastq_quality_boxplot +owner: devteam +remote_repository_url: https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/fastx_toolkit/fastq_quality_boxplot +type: unrestricted diff --git a/tool_collections/fastx_toolkit/fastq_quality_boxplot/fastq_quality_boxplot.xml b/tool_collections/fastx_toolkit/fastq_quality_boxplot/fastq_quality_boxplot.xml new file mode 100644 index 00000000000..4eddd4ce35a --- /dev/null +++ b/tool_collections/fastx_toolkit/fastq_quality_boxplot/fastq_quality_boxplot.xml @@ -0,0 +1,65 @@ + + + + macros.xml + + + fastq_quality_boxplot_graph.sh -t '$input.name' -i $input -o $output + + + + + + + + + + + + + + + + +**What it does** + +Creates a boxplot graph for the quality scores in the library. + +.. class:: infomark + +**TIP:** Use the **FASTQ Statistics** tool to generate the report file needed for this tool. + +----- + +**Output Examples** + +* Black horizontal lines are medians +* Rectangular red boxes show the Inter-quartile Range (IQR) (top value is Q3, bottom value is Q1) +* Whiskers show outlier at max. 1.5*IQR + + +An excellent quality library (median quality is 40 for almost all 36 cycles): + +.. image:: fastq_quality_boxplot_1.png + + +A relatively good quality library (median quality degrades towards later cycles): + +.. image:: fastq_quality_boxplot_2.png + +A low quality library (median drops quickly): + +.. image:: fastq_quality_boxplot_3.png + +------ + +This tool is based on `FASTX-toolkit`__ by Assaf Gordon. + + .. __: http://hannonlab.cshl.edu/fastx_toolkit/ + + + + + + + diff --git a/tool_collections/fastx_toolkit/fastq_quality_boxplot/fastq_quality_boxplot_1.png b/tool_collections/fastx_toolkit/fastq_quality_boxplot/fastq_quality_boxplot_1.png new file mode 100644 index 00000000000..f721716a18c Binary files /dev/null and b/tool_collections/fastx_toolkit/fastq_quality_boxplot/fastq_quality_boxplot_1.png differ diff --git a/tool_collections/fastx_toolkit/fastq_quality_boxplot/fastq_quality_boxplot_2.png b/tool_collections/fastx_toolkit/fastq_quality_boxplot/fastq_quality_boxplot_2.png new file mode 100644 index 00000000000..0ce5baf5fda Binary files /dev/null and b/tool_collections/fastx_toolkit/fastq_quality_boxplot/fastq_quality_boxplot_2.png differ diff --git a/tool_collections/fastx_toolkit/fastq_quality_boxplot/fastq_quality_boxplot_3.png b/tool_collections/fastx_toolkit/fastq_quality_boxplot/fastq_quality_boxplot_3.png new file mode 100644 index 00000000000..e21c49be929 Binary files /dev/null and b/tool_collections/fastx_toolkit/fastq_quality_boxplot/fastq_quality_boxplot_3.png differ diff --git a/tool_collections/fastx_toolkit/fastq_quality_boxplot/macros.xml b/tool_collections/fastx_toolkit/fastq_quality_boxplot/macros.xml new file mode 120000 index 00000000000..0c6ff6c3756 --- /dev/null +++ b/tool_collections/fastx_toolkit/fastq_quality_boxplot/macros.xml @@ -0,0 +1 @@ +../macros.xml \ No newline at end of file diff --git a/tool_collections/fastx_toolkit/fastq_quality_boxplot/test-data/fastq_quality_boxplot-in1.fastq b/tool_collections/fastx_toolkit/fastq_quality_boxplot/test-data/fastq_quality_boxplot-in1.fastq new file mode 100644 index 00000000000..852c8122efe --- /dev/null +++ b/tool_collections/fastx_toolkit/fastq_quality_boxplot/test-data/fastq_quality_boxplot-in1.fastq @@ -0,0 +1,37 @@ +column count min max sum mean Q1 med Q3 IQR lW rW A_Count C_Count G_Count T_Count N_Count Max_count +1 9 23 34 288 32.00 33 33 33 0 33 33 3 1 4 1 0 9 +2 9 28 33 287 31.89 31 33 33 2 28 33 3 3 2 1 0 9 +3 9 13 34 268 29.78 28 33 33 5 21 34 5 1 0 3 0 9 +4 9 17 33 261 29.00 30 33 33 3 26 33 1 2 3 3 0 9 +5 9 22 33 269 29.89 30 33 33 3 26 33 3 3 3 0 0 9 +6 9 22 33 277 30.78 30 33 33 3 26 33 5 3 0 1 0 9 +7 9 21 33 258 28.67 24 33 33 9 21 33 4 1 3 1 0 9 +8 9 12 33 263 29.22 32 33 33 1 31 33 2 1 1 5 0 9 +9 9 29 33 290 32.22 33 33 33 0 33 33 3 3 2 1 0 9 +10 9 23 33 277 30.78 32 33 33 1 31 33 1 4 2 2 0 9 +11 9 12 33 245 27.22 21 31 33 12 12 33 5 2 1 1 0 9 +12 9 13 33 214 23.78 15 24 33 18 13 33 2 4 2 1 0 9 +13 9 5 33 249 27.67 29 31 33 4 23 33 2 1 1 5 0 9 +14 9 5 33 233 25.89 24 33 33 9 11 33 3 3 2 1 0 9 +15 9 15 33 251 27.89 24 33 33 9 15 33 5 1 1 2 0 9 +16 9 23 34 269 29.89 24 33 33 9 23 34 3 1 2 3 0 9 +17 9 13 34 266 29.56 33 33 33 0 33 33 2 3 1 3 0 9 +18 9 21 34 272 30.22 31 33 33 2 28 34 0 5 1 3 0 9 +19 9 5 34 244 27.11 27 30 33 6 18 34 4 4 1 0 0 9 +20 9 11 34 241 26.78 23 32 33 10 11 34 3 4 2 0 0 9 +21 9 13 33 240 26.67 24 27 33 9 13 33 1 4 0 4 0 9 +22 9 5 33 190 21.11 13 21 33 20 5 33 1 4 0 3 1 9 +23 9 5 33 205 22.78 16 26 33 17 5 33 4 4 1 0 0 9 +24 9 5 33 247 27.44 28 31 33 5 21 33 1 5 1 2 0 9 +25 9 11 34 241 26.78 24 33 33 9 11 34 3 4 0 2 0 9 +26 9 5 33 212 23.56 18 31 33 15 5 33 0 6 0 3 0 9 +27 9 5 33 227 25.22 21 26 33 12 5 33 3 4 1 1 0 9 +28 9 21 33 255 28.33 24 31 33 9 21 33 2 4 3 0 0 9 +29 9 5 33 228 25.33 21 30 33 12 5 33 2 4 1 2 0 9 +30 9 10 33 213 23.67 16 28 33 17 10 33 3 4 2 0 0 9 +31 9 5 33 236 26.22 21 31 33 12 5 33 1 4 1 3 0 9 +32 9 5 33 210 23.33 12 29 33 21 5 33 3 3 0 3 0 9 +33 9 5 33 183 20.33 9 21 33 24 5 33 1 4 2 2 0 9 +34 9 5 33 150 16.67 7 17 22 15 5 33 3 4 1 1 0 9 +35 9 13 33 217 24.11 21 24 29 8 13 33 1 4 1 3 0 9 +36 9 5 33 195 21.67 18 21 32 14 5 33 3 2 1 3 0 9 diff --git a/tool_collections/fastx_toolkit/fastq_quality_boxplot/test-data/fastq_quality_boxplot-out1.png b/tool_collections/fastx_toolkit/fastq_quality_boxplot/test-data/fastq_quality_boxplot-out1.png new file mode 100644 index 00000000000..afeff9d3b80 Binary files /dev/null and b/tool_collections/fastx_toolkit/fastq_quality_boxplot/test-data/fastq_quality_boxplot-out1.png differ diff --git a/tool_collections/fastx_toolkit/fastq_quality_converter/.shed.yml b/tool_collections/fastx_toolkit/fastq_quality_converter/.shed.yml new file mode 100644 index 00000000000..2918e7b321c --- /dev/null +++ b/tool_collections/fastx_toolkit/fastq_quality_converter/.shed.yml @@ -0,0 +1,10 @@ +categories: +- Fastq Manipulation +description: Quality format converter (ASCII-Numeric) +long_description: | + Converts a Solexa FASTQ file to/from numeric or ASCII quality format. +homepage_url: http://hannonlab.cshl.edu/fastx_toolkit/ +name: fastq_quality_converter +owner: devteam +remote_repository_url: https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/fastx_toolkit/fastq_quality_converter +type: unrestricted diff --git a/tool_collections/fastx_toolkit/fastq_quality_converter/fastq_quality_converter.xml b/tool_collections/fastx_toolkit/fastq_quality_converter/fastq_quality_converter.xml new file mode 100644 index 00000000000..9d8029c2035 --- /dev/null +++ b/tool_collections/fastx_toolkit/fastq_quality_converter/fastq_quality_converter.xml @@ -0,0 +1,100 @@ + + (ASCII-Numeric) + + macros.xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +**What it does** + +Converts a Solexa FASTQ file to/from numeric or ASCII quality format. + +.. class:: warningmark + +Re-scaling is **not** performed. (e.g. conversion from Phred scale to Solexa scale). + +----- + +FASTQ with Numeric quality scores:: + + @CSHL__2_FC042AGWWWXX:8:1:120:202 + ACGATAGATCGGAAGAGCTAGTATGCCGTTTTCTGC + +CSHL__2_FC042AGWWWXX:8:1:120:202 + 40 40 40 40 20 40 40 40 40 6 40 40 28 40 40 25 40 20 40 -1 30 40 14 27 40 8 1 3 7 -1 11 10 -1 21 10 8 + @CSHL__2_FC042AGWWWXX:8:1:103:1185 + ATCACGATAGATCGGCAGAGCTCGTTTACCGTCTTC + +CSHL__2_FC042AGWWWXX:8:1:103:1185 + 40 40 40 40 40 35 33 31 40 40 40 32 30 22 40 -0 9 22 17 14 8 36 15 34 22 12 23 3 10 -0 8 2 4 25 30 2 + + +FASTQ with ASCII quality scores:: + + @CSHL__2_FC042AGWWWXX:8:1:120:202 + ACGATAGATCGGAAGAGCTAGTATGCCGTTTTCTGC + +CSHL__2_FC042AGWWWXX:8:1:120:202 + hhhhThhhhFhh\hhYhTh?^hN[hHACG?KJ?UJH + @CSHL__2_FC042AGWWWXX:8:1:103:1185 + ATCACGATAGATCGGCAGAGCTCGTTTACCGTCTTC + +CSHL__2_FC042AGWWWXX:8:1:103:1185 + hhhhhca_hhh`^Vh@IVQNHdObVLWCJ@HBDY^B + +------ + +This tool is based on `FASTX-toolkit`__ by Assaf Gordon. + + .. __: http://hannonlab.cshl.edu/fastx_toolkit/ + + + + diff --git a/tool_collections/fastx_toolkit/fastq_quality_converter/macros.xml b/tool_collections/fastx_toolkit/fastq_quality_converter/macros.xml new file mode 120000 index 00000000000..0c6ff6c3756 --- /dev/null +++ b/tool_collections/fastx_toolkit/fastq_quality_converter/macros.xml @@ -0,0 +1 @@ +../macros.xml \ No newline at end of file diff --git a/tool_collections/fastx_toolkit/fastq_quality_converter/test-data/fastq_qual_conv1.fastq b/tool_collections/fastx_toolkit/fastq_quality_converter/test-data/fastq_qual_conv1.fastq new file mode 100644 index 00000000000..76c0d6c4f2a --- /dev/null +++ b/tool_collections/fastx_toolkit/fastq_quality_converter/test-data/fastq_qual_conv1.fastq @@ -0,0 +1,36 @@ +@CSHL_3_FC042AGLLWW:1:2:7:203 +GTACGCATGACCGAACCCCCCNCCCCCCCCCATGTC ++CSHL_3_FC042AGLLWW:1:2:7:203 +aab^V^aU]`aa^aZaaabbXEZabaaaaaaaa]]` +@CSHL_3_FC042AGLLWW:1:2:7:33 +CAATGCCTCTACTCATCCCAGTAGAGGCCCGTGGCC ++CSHL_3_FC042AGLLWW:1:2:7:33 +Waaa^aZaaW^U_XaWaa\WMEP^KEZXRPEEEGaa +@CSHL_3_FC042AGLLWW:1:2:7:169 +GCAGCAGGCGCGTCAGAGAGCCCCCCCCCCCCCCCC ++CSHL_3_FC042AGLLWW:1:2:7:169 +a_M^a\Uaaa_M_aaaaaaaaaaaaaaaV\ZUGUUR +@CSHL_3_FC042AGLLWW:1:2:7:1436 +AATTATTTATTAAATTTTAATAATATGGGAGACACT ++CSHL_3_FC042AGLLWW:1:2:7:1436 +a^aaaaaaaaaaaaaaa_U`aaaaa_S_aaaaaVV[ +@CSHL_3_FC042AGLLWW:1:2:7:292 +GGAGAAATACACACACACACTCATCGTCGTCCCCCG ++CSHL_3_FC042AGLLWW:1:2:7:292 +babaaaaaaaUMaaaaaaaaaaa\XEUUEP_]UERE +@CSHL_3_FC042AGLLWW:1:2:7:1819 +AATTCAAACCACCCCAACCCACACACAGAGATACCC ++CSHL_3_FC042AGLLWW:1:2:7:1819 +a\\QVVVLaaLOEXUWUUEKUULEMUEUUKULIQMU +@CSHL_3_FC042AGLLWW:1:2:7:1875 +GCAAAAGAGTAGTGTACCCCCCCCCCCCCCCCCCCC ++CSHL_3_FC042AGLLWW:1:2:7:1875 +aaaaaaaaaXUXXEXaaaaa`_ZaaaaaaaaaXEXU +@CSHL_3_FC042AGLLWW:1:2:8:624 +ACTGGCGCTGTGGAGAGTGTCACACCCCCCCCCCCC ++CSHL_3_FC042AGLLWW:1:2:8:624 +aa[S^`X`aa_]]OOXMU^_[MU_aaaaaaaaaaaa +@CSHL_3_FC042AGLLWW:1:2:8:250 +TGCCGCGCACACTGATGACGCGGCCGCTCGCGCTCT ++CSHL_3_FC042AGLLWW:1:2:8:250 +aaaaaaaa^aaaaaabbb[KXPEU[RXZ^JUKRKXE diff --git a/tool_collections/fastx_toolkit/fastq_quality_converter/test-data/fastq_qual_conv1.out b/tool_collections/fastx_toolkit/fastq_quality_converter/test-data/fastq_qual_conv1.out new file mode 100644 index 00000000000..73283203055 --- /dev/null +++ b/tool_collections/fastx_toolkit/fastq_quality_converter/test-data/fastq_qual_conv1.out @@ -0,0 +1,36 @@ +@CSHL_3_FC042AGLLWW:1:2:7:203 +GTACGCATGACCGAACCCCCCNCCCCCCCCCATGTC ++CSHL_3_FC042AGLLWW:1:2:7:203 +33 33 34 30 22 30 33 21 29 32 33 33 30 33 26 33 33 33 34 34 24 5 26 33 34 33 33 33 33 33 33 33 33 29 29 32 +@CSHL_3_FC042AGLLWW:1:2:7:33 +CAATGCCTCTACTCATCCCAGTAGAGGCCCGTGGCC ++CSHL_3_FC042AGLLWW:1:2:7:33 +23 33 33 33 30 33 26 33 33 23 30 21 31 24 33 23 33 33 28 23 13 5 16 30 11 5 26 24 18 16 5 5 5 7 33 33 +@CSHL_3_FC042AGLLWW:1:2:7:169 +GCAGCAGGCGCGTCAGAGAGCCCCCCCCCCCCCCCC ++CSHL_3_FC042AGLLWW:1:2:7:169 +33 31 13 30 33 28 21 33 33 33 31 13 31 33 33 33 33 33 33 33 33 33 33 33 33 33 33 33 22 28 26 21 7 21 21 18 +@CSHL_3_FC042AGLLWW:1:2:7:1436 +AATTATTTATTAAATTTTAATAATATGGGAGACACT ++CSHL_3_FC042AGLLWW:1:2:7:1436 +33 30 33 33 33 33 33 33 33 33 33 33 33 33 33 33 33 31 21 32 33 33 33 33 33 31 19 31 33 33 33 33 33 22 22 27 +@CSHL_3_FC042AGLLWW:1:2:7:292 +GGAGAAATACACACACACACTCATCGTCGTCCCCCG ++CSHL_3_FC042AGLLWW:1:2:7:292 +34 33 34 33 33 33 33 33 33 33 21 13 33 33 33 33 33 33 33 33 33 33 33 28 24 5 21 21 5 16 31 29 21 5 18 5 +@CSHL_3_FC042AGLLWW:1:2:7:1819 +AATTCAAACCACCCCAACCCACACACAGAGATACCC ++CSHL_3_FC042AGLLWW:1:2:7:1819 +33 28 28 17 22 22 22 12 33 33 12 15 5 24 21 23 21 21 5 11 21 21 12 5 13 21 5 21 21 11 21 12 9 17 13 21 +@CSHL_3_FC042AGLLWW:1:2:7:1875 +GCAAAAGAGTAGTGTACCCCCCCCCCCCCCCCCCCC ++CSHL_3_FC042AGLLWW:1:2:7:1875 +33 33 33 33 33 33 33 33 33 24 21 24 24 5 24 33 33 33 33 33 32 31 26 33 33 33 33 33 33 33 33 33 24 5 24 21 +@CSHL_3_FC042AGLLWW:1:2:8:624 +ACTGGCGCTGTGGAGAGTGTCACACCCCCCCCCCCC ++CSHL_3_FC042AGLLWW:1:2:8:624 +33 33 27 19 30 32 24 32 33 33 31 29 29 15 15 24 13 21 30 31 27 13 21 31 33 33 33 33 33 33 33 33 33 33 33 33 +@CSHL_3_FC042AGLLWW:1:2:8:250 +TGCCGCGCACACTGATGACGCGGCCGCTCGCGCTCT ++CSHL_3_FC042AGLLWW:1:2:8:250 +33 33 33 33 33 33 33 33 30 33 33 33 33 33 33 34 34 34 27 11 24 16 5 21 27 18 24 26 30 10 21 11 18 11 24 5 diff --git a/tool_collections/fastx_toolkit/fastq_quality_converter/test-data/fastq_qual_conv1a.out b/tool_collections/fastx_toolkit/fastq_quality_converter/test-data/fastq_qual_conv1a.out new file mode 100644 index 00000000000..76c0d6c4f2a --- /dev/null +++ b/tool_collections/fastx_toolkit/fastq_quality_converter/test-data/fastq_qual_conv1a.out @@ -0,0 +1,36 @@ +@CSHL_3_FC042AGLLWW:1:2:7:203 +GTACGCATGACCGAACCCCCCNCCCCCCCCCATGTC ++CSHL_3_FC042AGLLWW:1:2:7:203 +aab^V^aU]`aa^aZaaabbXEZabaaaaaaaa]]` +@CSHL_3_FC042AGLLWW:1:2:7:33 +CAATGCCTCTACTCATCCCAGTAGAGGCCCGTGGCC ++CSHL_3_FC042AGLLWW:1:2:7:33 +Waaa^aZaaW^U_XaWaa\WMEP^KEZXRPEEEGaa +@CSHL_3_FC042AGLLWW:1:2:7:169 +GCAGCAGGCGCGTCAGAGAGCCCCCCCCCCCCCCCC ++CSHL_3_FC042AGLLWW:1:2:7:169 +a_M^a\Uaaa_M_aaaaaaaaaaaaaaaV\ZUGUUR +@CSHL_3_FC042AGLLWW:1:2:7:1436 +AATTATTTATTAAATTTTAATAATATGGGAGACACT ++CSHL_3_FC042AGLLWW:1:2:7:1436 +a^aaaaaaaaaaaaaaa_U`aaaaa_S_aaaaaVV[ +@CSHL_3_FC042AGLLWW:1:2:7:292 +GGAGAAATACACACACACACTCATCGTCGTCCCCCG ++CSHL_3_FC042AGLLWW:1:2:7:292 +babaaaaaaaUMaaaaaaaaaaa\XEUUEP_]UERE +@CSHL_3_FC042AGLLWW:1:2:7:1819 +AATTCAAACCACCCCAACCCACACACAGAGATACCC ++CSHL_3_FC042AGLLWW:1:2:7:1819 +a\\QVVVLaaLOEXUWUUEKUULEMUEUUKULIQMU +@CSHL_3_FC042AGLLWW:1:2:7:1875 +GCAAAAGAGTAGTGTACCCCCCCCCCCCCCCCCCCC ++CSHL_3_FC042AGLLWW:1:2:7:1875 +aaaaaaaaaXUXXEXaaaaa`_ZaaaaaaaaaXEXU +@CSHL_3_FC042AGLLWW:1:2:8:624 +ACTGGCGCTGTGGAGAGTGTCACACCCCCCCCCCCC ++CSHL_3_FC042AGLLWW:1:2:8:624 +aa[S^`X`aa_]]OOXMU^_[MU_aaaaaaaaaaaa +@CSHL_3_FC042AGLLWW:1:2:8:250 +TGCCGCGCACACTGATGACGCGGCCGCTCGCGCTCT ++CSHL_3_FC042AGLLWW:1:2:8:250 +aaaaaaaa^aaaaaabbb[KXPEU[RXZ^JUKRKXE diff --git a/tool_collections/fastx_toolkit/fastq_quality_converter/test-data/fastq_qual_conv2.fastq b/tool_collections/fastx_toolkit/fastq_quality_converter/test-data/fastq_qual_conv2.fastq new file mode 100644 index 00000000000..e3caaf2d9f2 --- /dev/null +++ b/tool_collections/fastx_toolkit/fastq_quality_converter/test-data/fastq_qual_conv2.fastq @@ -0,0 +1,60 @@ +@CSHL_3_FC0420AGLLKK:2:1:233:1674 +GTTAGAGGGAATACACCCACTCTGTAGGCACCATC ++CSHL_3_FC0420AGLLKK:2:1:233:1674 +40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 32 40 40 40 40 16 20 25 9 21 37 40 40 16 29 26 30 +@CSHL_3_FC0420AGLLKK:2:1:136:448 +GTTCTCAGGACCCCTTCAGTAGTNGGCACCATCAA ++CSHL_3_FC0420AGLLKK:2:1:136:448 +40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 -5 13 17 28 40 40 8 17 27 8 13 10 +@CSHL_3_FC0420AGLLKK:2:1:237:1037 +GTGATAGATTGTCTTGTTGTTCTGTAGGCACCATC ++CSHL_3_FC0420AGLLKK:2:1:237:1037 +40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 4 40 40 26 35 40 38 40 6 40 40 0 3 26 32 27 14 11 26 11 +@CSHL_3_FC0420AGLLKK:2:1:1601:1525 +AAAAACACAAAAAAAAAAAAAAAAAAAAAAAAAAA ++CSHL_3_FC0420AGLLKK:2:1:1601:1525 +40 40 40 40 40 40 40 40 40 40 40 40 35 40 40 12 40 40 30 30 40 40 40 12 36 23 17 24 18 22 25 15 10 34 14 +@CSHL_3_FC0420AGLLKK:2:1:1805:1464 +GATGCGTTCGAGATGGGTGCGCTGTAGGCACCATC ++CSHL_3_FC0420AGLLKK:2:1:1805:1464 +40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 16 23 28 40 21 40 9 37 13 20 21 7 11 14 14 6 23 10 +@CSHL_3_FC0420AGLLKK:2:1:1713:528 +AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA ++CSHL_3_FC0420AGLLKK:2:1:1713:528 +40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 32 40 12 38 15 22 20 17 14 12 10 7 22 11 +@CSHL_3_FC0420AGLLKK:2:1:126:1087 +GAGATATTCGAATGCATCATCAGATGGCACCATCA ++CSHL_3_FC0420AGLLKK:2:1:126:1087 +40 40 40 40 40 40 40 40 40 40 40 40 40 40 25 40 40 40 40 40 40 40 31 40 40 11 10 23 40 13 12 17 37 17 22 +@CSHL_3_FC0420AGLLKK:2:1:1488:1323 +GTTTTTTCCCCTAATCTGAGTCTGTAGGCACCATC ++CSHL_3_FC0420AGLLKK:2:1:1488:1323 +40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 39 22 31 40 40 12 29 22 0 7 12 8 18 7 3 18 9 +@CSHL_3_FC0420AGLLKK:2:1:913:199 +GTTCAGTGTTGGTGCACTGTGTTNTAGGCACCATC ++CSHL_3_FC0420AGLLKK:2:1:913:199 +40 40 39 40 40 40 40 40 40 40 40 40 4 40 40 24 34 20 33 21 36 32 40 -5 40 13 21 21 26 17 18 25 14 25 21 +@CSHL_3_FC0420AGLLKK:2:1:1236:1157 +AAAAAAAAAAAAAAAACAAAAAAAAAAAAAACAAA ++CSHL_3_FC0420AGLLKK:2:1:1236:1157 +40 40 40 40 40 40 40 40 40 40 40 40 40 35 40 40 40 40 40 33 40 37 40 40 40 18 16 20 23 22 31 26 10 22 19 +@CSHL_3_FC0420AGLLKK:2:1:928:765 +GTTTTCAGTTCGAGGTTCGTGCTNTAGGCATTATC ++CSHL_3_FC0420AGLLKK:2:1:928:765 +40 40 40 40 40 40 40 40 40 40 40 40 40 25 27 40 37 35 27 40 40 17 40 -5 36 11 19 15 19 16 11 12 12 23 11 +@CSHL_3_FC0420AGLLKK:2:1:727:1020 +GTAATATAGTTGATAAGAATCTGCAGAGAGAATCA ++CSHL_3_FC0420AGLLKK:2:1:727:1020 +40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 30 40 40 24 18 38 33 26 16 23 22 16 18 +@CSHL_3_FC0420AGLLKK:2:1:758:1799 +GTAGAGACCCCCTAATAGAGTCTGTAGGCACCATC ++CSHL_3_FC0420AGLLKK:2:1:758:1799 +40 40 40 40 40 40 40 40 35 40 39 40 40 27 20 40 17 34 15 40 40 40 40 15 28 17 4 12 10 10 18 14 3 14 11 +@CSHL_3_FC0420AGLLKK:2:1:1818:550 +AAAAAAAAAAAAAAAACAAAAACAAAAAAAACAAA ++CSHL_3_FC0420AGLLKK:2:1:1818:550 +40 40 40 40 40 40 40 40 40 40 40 40 40 40 36 32 40 33 40 40 38 37 40 28 29 27 22 13 20 19 17 17 13 33 18 +@CSHL_3_FC0420AGLLKK:2:1:1764:391 +CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC ++CSHL_3_FC0420AGLLKK:2:1:1764:391 +40 40 40 40 40 40 40 40 40 40 40 33 40 40 40 40 40 24 40 40 40 40 40 12 40 24 14 9 22 15 29 18 11 40 22 diff --git a/tool_collections/fastx_toolkit/fastq_quality_converter/test-data/fastq_qual_conv2.out b/tool_collections/fastx_toolkit/fastq_quality_converter/test-data/fastq_qual_conv2.out new file mode 100644 index 00000000000..56cb36d4af0 --- /dev/null +++ b/tool_collections/fastx_toolkit/fastq_quality_converter/test-data/fastq_qual_conv2.out @@ -0,0 +1,60 @@ +@CSHL_3_FC0420AGLLKK:2:1:233:1674 +GTTAGAGGGAATACACCCACTCTGTAGGCACCATC ++CSHL_3_FC0420AGLLKK:2:1:233:1674 +hhhhhhhhhhhhhhhhhh`hhhhPTYIUehhP]Z^ +@CSHL_3_FC0420AGLLKK:2:1:136:448 +GTTCTCAGGACCCCTTCAGTAGTNGGCACCATCAA ++CSHL_3_FC0420AGLLKK:2:1:136:448 +hhhhhhhhhhhhhhhhhhhhhhh;MQ\hhHQ[HMJ +@CSHL_3_FC0420AGLLKK:2:1:237:1037 +GTGATAGATTGTCTTGTTGTTCTGTAGGCACCATC ++CSHL_3_FC0420AGLLKK:2:1:237:1037 +hhhhhhhhhhhhhhhDhhZchfhFhh@CZ`[NKZK +@CSHL_3_FC0420AGLLKK:2:1:1601:1525 +AAAAACACAAAAAAAAAAAAAAAAAAAAAAAAAAA ++CSHL_3_FC0420AGLLKK:2:1:1601:1525 +hhhhhhhhhhhhchhLhh^^hhhLdWQXRVYOJbN +@CSHL_3_FC0420AGLLKK:2:1:1805:1464 +GATGCGTTCGAGATGGGTGCGCTGTAGGCACCATC ++CSHL_3_FC0420AGLLKK:2:1:1805:1464 +hhhhhhhhhhhhhhhhhPW\hUhIeMTUGKNNFWJ +@CSHL_3_FC0420AGLLKK:2:1:1713:528 +AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA ++CSHL_3_FC0420AGLLKK:2:1:1713:528 +hhhhhhhhhhhhhhhhhhhhh`hLfOVTQNLJGVK +@CSHL_3_FC0420AGLLKK:2:1:126:1087 +GAGATATTCGAATGCATCATCAGATGGCACCATCA ++CSHL_3_FC0420AGLLKK:2:1:126:1087 +hhhhhhhhhhhhhhYhhhhhhh_hhKJWhMLQeQV +@CSHL_3_FC0420AGLLKK:2:1:1488:1323 +GTTTTTTCCCCTAATCTGAGTCTGTAGGCACCATC ++CSHL_3_FC0420AGLLKK:2:1:1488:1323 +hhhhhhhhhhhhhhhhhhgV_hhL]V@GLHRGCRI +@CSHL_3_FC0420AGLLKK:2:1:913:199 +GTTCAGTGTTGGTGCACTGTGTTNTAGGCACCATC ++CSHL_3_FC0420AGLLKK:2:1:913:199 +hhghhhhhhhhhDhhXbTaUd`h;hMUUZQRYNYU +@CSHL_3_FC0420AGLLKK:2:1:1236:1157 +AAAAAAAAAAAAAAAACAAAAAAAAAAAAAACAAA ++CSHL_3_FC0420AGLLKK:2:1:1236:1157 +hhhhhhhhhhhhhchhhhhahehhhRPTWV_ZJVS +@CSHL_3_FC0420AGLLKK:2:1:928:765 +GTTTTCAGTTCGAGGTTCGTGCTNTAGGCATTATC ++CSHL_3_FC0420AGLLKK:2:1:928:765 +hhhhhhhhhhhhhY[hec[hhQh;dKSOSPKLLWK +@CSHL_3_FC0420AGLLKK:2:1:727:1020 +GTAATATAGTTGATAAGAATCTGCAGAGAGAATCA ++CSHL_3_FC0420AGLLKK:2:1:727:1020 +hhhhhhhhhhhhhhhhhhhhhh^hhXRfaZPWVPR +@CSHL_3_FC0420AGLLKK:2:1:758:1799 +GTAGAGACCCCCTAATAGAGTCTGTAGGCACCATC ++CSHL_3_FC0420AGLLKK:2:1:758:1799 +hhhhhhhhchghh[ThQbOhhhhO\QDLJJRNCNK +@CSHL_3_FC0420AGLLKK:2:1:1818:550 +AAAAAAAAAAAAAAAACAAAAACAAAAAAAACAAA ++CSHL_3_FC0420AGLLKK:2:1:1818:550 +hhhhhhhhhhhhhhd`hahhfeh\][VMTSQQMaR +@CSHL_3_FC0420AGLLKK:2:1:1764:391 +CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC ++CSHL_3_FC0420AGLLKK:2:1:1764:391 +hhhhhhhhhhhahhhhhXhhhhhLhXNIVO]RKhV diff --git a/tool_collections/fastx_toolkit/fastq_quality_converter/test-data/fastq_qual_conv2n.out b/tool_collections/fastx_toolkit/fastq_quality_converter/test-data/fastq_qual_conv2n.out new file mode 100644 index 00000000000..e3caaf2d9f2 --- /dev/null +++ b/tool_collections/fastx_toolkit/fastq_quality_converter/test-data/fastq_qual_conv2n.out @@ -0,0 +1,60 @@ +@CSHL_3_FC0420AGLLKK:2:1:233:1674 +GTTAGAGGGAATACACCCACTCTGTAGGCACCATC ++CSHL_3_FC0420AGLLKK:2:1:233:1674 +40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 32 40 40 40 40 16 20 25 9 21 37 40 40 16 29 26 30 +@CSHL_3_FC0420AGLLKK:2:1:136:448 +GTTCTCAGGACCCCTTCAGTAGTNGGCACCATCAA ++CSHL_3_FC0420AGLLKK:2:1:136:448 +40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 -5 13 17 28 40 40 8 17 27 8 13 10 +@CSHL_3_FC0420AGLLKK:2:1:237:1037 +GTGATAGATTGTCTTGTTGTTCTGTAGGCACCATC ++CSHL_3_FC0420AGLLKK:2:1:237:1037 +40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 4 40 40 26 35 40 38 40 6 40 40 0 3 26 32 27 14 11 26 11 +@CSHL_3_FC0420AGLLKK:2:1:1601:1525 +AAAAACACAAAAAAAAAAAAAAAAAAAAAAAAAAA ++CSHL_3_FC0420AGLLKK:2:1:1601:1525 +40 40 40 40 40 40 40 40 40 40 40 40 35 40 40 12 40 40 30 30 40 40 40 12 36 23 17 24 18 22 25 15 10 34 14 +@CSHL_3_FC0420AGLLKK:2:1:1805:1464 +GATGCGTTCGAGATGGGTGCGCTGTAGGCACCATC ++CSHL_3_FC0420AGLLKK:2:1:1805:1464 +40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 16 23 28 40 21 40 9 37 13 20 21 7 11 14 14 6 23 10 +@CSHL_3_FC0420AGLLKK:2:1:1713:528 +AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA ++CSHL_3_FC0420AGLLKK:2:1:1713:528 +40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 32 40 12 38 15 22 20 17 14 12 10 7 22 11 +@CSHL_3_FC0420AGLLKK:2:1:126:1087 +GAGATATTCGAATGCATCATCAGATGGCACCATCA ++CSHL_3_FC0420AGLLKK:2:1:126:1087 +40 40 40 40 40 40 40 40 40 40 40 40 40 40 25 40 40 40 40 40 40 40 31 40 40 11 10 23 40 13 12 17 37 17 22 +@CSHL_3_FC0420AGLLKK:2:1:1488:1323 +GTTTTTTCCCCTAATCTGAGTCTGTAGGCACCATC ++CSHL_3_FC0420AGLLKK:2:1:1488:1323 +40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 39 22 31 40 40 12 29 22 0 7 12 8 18 7 3 18 9 +@CSHL_3_FC0420AGLLKK:2:1:913:199 +GTTCAGTGTTGGTGCACTGTGTTNTAGGCACCATC ++CSHL_3_FC0420AGLLKK:2:1:913:199 +40 40 39 40 40 40 40 40 40 40 40 40 4 40 40 24 34 20 33 21 36 32 40 -5 40 13 21 21 26 17 18 25 14 25 21 +@CSHL_3_FC0420AGLLKK:2:1:1236:1157 +AAAAAAAAAAAAAAAACAAAAAAAAAAAAAACAAA ++CSHL_3_FC0420AGLLKK:2:1:1236:1157 +40 40 40 40 40 40 40 40 40 40 40 40 40 35 40 40 40 40 40 33 40 37 40 40 40 18 16 20 23 22 31 26 10 22 19 +@CSHL_3_FC0420AGLLKK:2:1:928:765 +GTTTTCAGTTCGAGGTTCGTGCTNTAGGCATTATC ++CSHL_3_FC0420AGLLKK:2:1:928:765 +40 40 40 40 40 40 40 40 40 40 40 40 40 25 27 40 37 35 27 40 40 17 40 -5 36 11 19 15 19 16 11 12 12 23 11 +@CSHL_3_FC0420AGLLKK:2:1:727:1020 +GTAATATAGTTGATAAGAATCTGCAGAGAGAATCA ++CSHL_3_FC0420AGLLKK:2:1:727:1020 +40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 30 40 40 24 18 38 33 26 16 23 22 16 18 +@CSHL_3_FC0420AGLLKK:2:1:758:1799 +GTAGAGACCCCCTAATAGAGTCTGTAGGCACCATC ++CSHL_3_FC0420AGLLKK:2:1:758:1799 +40 40 40 40 40 40 40 40 35 40 39 40 40 27 20 40 17 34 15 40 40 40 40 15 28 17 4 12 10 10 18 14 3 14 11 +@CSHL_3_FC0420AGLLKK:2:1:1818:550 +AAAAAAAAAAAAAAAACAAAAACAAAAAAAACAAA ++CSHL_3_FC0420AGLLKK:2:1:1818:550 +40 40 40 40 40 40 40 40 40 40 40 40 40 40 36 32 40 33 40 40 38 37 40 28 29 27 22 13 20 19 17 17 13 33 18 +@CSHL_3_FC0420AGLLKK:2:1:1764:391 +CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC ++CSHL_3_FC0420AGLLKK:2:1:1764:391 +40 40 40 40 40 40 40 40 40 40 40 33 40 40 40 40 40 24 40 40 40 40 40 12 40 24 14 9 22 15 29 18 11 40 22 diff --git a/tool_collections/fastx_toolkit/fastq_quality_filter/.shed.yml b/tool_collections/fastx_toolkit/fastq_quality_filter/.shed.yml new file mode 100644 index 00000000000..e4cd18d8c65 --- /dev/null +++ b/tool_collections/fastx_toolkit/fastq_quality_filter/.shed.yml @@ -0,0 +1,10 @@ +categories: +- Fastq Manipulation +description: Filter by quality +long_description: | + This tool filters reads based on quality scores. +homepage_url: http://hannonlab.cshl.edu/fastx_toolkit/ +name: fastq_quality_filter +owner: devteam +remote_repository_url: https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/fastx_toolkit/fastq_quality_filter +type: unrestricted diff --git a/tool_collections/fastx_toolkit/fastq_quality_filter/fastq_quality_filter.xml b/tool_collections/fastx_toolkit/fastq_quality_filter/fastq_quality_filter.xml new file mode 100644 index 00000000000..cc02f9046b6 --- /dev/null +++ b/tool_collections/fastx_toolkit/fastq_quality_filter/fastq_quality_filter.xml @@ -0,0 +1,85 @@ + + + + macros.xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +**What it does** + +This tool filters reads based on quality scores. + +.. class:: infomark + +Using **percent = 100** requires all cycles of all reads to be at least the quality cut-off value. + +.. class:: infomark + +Using **percent = 50** requires the median quality of the cycles (in each read) to be at least the quality cut-off value. + +-------- + +Quality score distribution (of all cycles) is calculated for each read. If it is lower than the quality cut-off value - the read is discarded. + + +**Example**:: + + @CSHL_4_FC042AGOOII:1:2:214:584 + GACAATAAAC + +CSHL_4_FC042AGOOII:1:2:214:584 + 30 30 30 30 30 30 30 30 20 10 + +Using **percent = 50** and **cut-off = 30** - This read will not be discarded (the median quality is higher than 30). + +Using **percent = 90** and **cut-off = 30** - This read will be discarded (90% of the cycles do no have quality equal to / higher than 30). + +Using **percent = 100** and **cut-off = 20** - This read will be discarded (not all cycles have quality equal to / higher than 20). + +------ + +This tool is based on `FASTX-toolkit`__ by Assaf Gordon. + + .. __: http://hannonlab.cshl.edu/fastx_toolkit/ + + + + diff --git a/tool_collections/fastx_toolkit/fastq_quality_filter/macros.xml b/tool_collections/fastx_toolkit/fastq_quality_filter/macros.xml new file mode 120000 index 00000000000..0c6ff6c3756 --- /dev/null +++ b/tool_collections/fastx_toolkit/fastq_quality_filter/macros.xml @@ -0,0 +1 @@ +../macros.xml \ No newline at end of file diff --git a/tool_collections/fastx_toolkit/fastq_quality_filter/test-data/fastq_qual_filter1.fastq b/tool_collections/fastx_toolkit/fastq_quality_filter/test-data/fastq_qual_filter1.fastq new file mode 100644 index 00000000000..ea3db077f2e --- /dev/null +++ b/tool_collections/fastx_toolkit/fastq_quality_filter/test-data/fastq_qual_filter1.fastq @@ -0,0 +1,36 @@ +@CSHL_3_FC042AGLLWW:1:2:7:203 +GTACGCATGACCGAACCCCCCNCCCCCCAATTGGTT ++CSHL_3_FC042AGLLWW:1:2:7:203 +aab^V^aU]`aa^aZaaaaaaaaabaaaaaaaa]]` +@CSHL_3_FC042AGLLWW:1:2:7:33 +CAATGCCTCCAATTGGTTAATCCCCCTATATATACT ++CSHL_3_FC042AGLLWW:1:2:7:33 +aaaaaaaaaW^U_XaWaa\WMEP^KEZXRPEEEGaa +@CSHL_3_FC042AGLLWW:1:2:7:169 +GCAGCAGGCGCGTCAGAGAGCCCCCCCCCCCCCCCC ++CSHL_3_FC042AGLLWW:1:2:7:169 +a_M^a\Uaaa_M_aaaZZZZZZUZUZaaV\ZUGUUR +@CSHL_3_FC042AGLLWW:1:2:7:1436 +AATTATTTATTAAATTTTAATAATATGGGAGACACT ++CSHL_3_FC042AGLLWW:1:2:7:1436 +a^aaaaaaaaaaaaaaa_U`aaaaa_S_aaaaaVV[ +@CSHL_3_FC042AGLLWW:1:2:7:292 +GGAGAAATACACACAATTGGTTAATCCCCCTATATA ++CSHL_3_FC042AGLLWW:1:2:7:292 +babaaaaaaaUMaaaaaaaaaaa\XEUUEP_]UERE +@CSHL_3_FC042AGLLWW:1:2:7:1819 +AATTCAAACCACCCCAACCCACACACAGAGATACAA ++CSHL_3_FC042AGLLWW:1:2:7:1819 +a\\QVVVLaaLOEXUWUUEKUULEMUEUUKULIQMU +@CSHL_3_FC042AGLLWW:1:2:7:1875 +GCAAAAGAGTAGTGTACCCCCCCCCCCCCCCCCCCC ++CSHL_3_FC042AGLLWW:1:2:7:1875 +aaaaaaaaaXUXXEXaaaaa`_ZaaaaaaaaaXEXU +@CSHL_3_FC042AGLLWW:1:2:8:624 +ACTGCAATTGGTTAATCCCCCTATATAGCGCTGTGG ++CSHL_3_FC042AGLLWW:1:2:8:624 +aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa +@CSHL_3_FC042AGLLWW:1:2:8:250 +TGCCGCGCACACTGATGCAATTGGTTAATCCCCCTA ++CSHL_3_FC042AGLLWW:1:2:8:250 +aaaaaaaa^aaaaaabbb[KXPEU[RXZ^JUKRKXE diff --git a/tool_collections/fastx_toolkit/fastq_quality_filter/test-data/fastq_qual_filter1a.out b/tool_collections/fastx_toolkit/fastq_quality_filter/test-data/fastq_qual_filter1a.out new file mode 100644 index 00000000000..ea3db077f2e --- /dev/null +++ b/tool_collections/fastx_toolkit/fastq_quality_filter/test-data/fastq_qual_filter1a.out @@ -0,0 +1,36 @@ +@CSHL_3_FC042AGLLWW:1:2:7:203 +GTACGCATGACCGAACCCCCCNCCCCCCAATTGGTT ++CSHL_3_FC042AGLLWW:1:2:7:203 +aab^V^aU]`aa^aZaaaaaaaaabaaaaaaaa]]` +@CSHL_3_FC042AGLLWW:1:2:7:33 +CAATGCCTCCAATTGGTTAATCCCCCTATATATACT ++CSHL_3_FC042AGLLWW:1:2:7:33 +aaaaaaaaaW^U_XaWaa\WMEP^KEZXRPEEEGaa +@CSHL_3_FC042AGLLWW:1:2:7:169 +GCAGCAGGCGCGTCAGAGAGCCCCCCCCCCCCCCCC ++CSHL_3_FC042AGLLWW:1:2:7:169 +a_M^a\Uaaa_M_aaaZZZZZZUZUZaaV\ZUGUUR +@CSHL_3_FC042AGLLWW:1:2:7:1436 +AATTATTTATTAAATTTTAATAATATGGGAGACACT ++CSHL_3_FC042AGLLWW:1:2:7:1436 +a^aaaaaaaaaaaaaaa_U`aaaaa_S_aaaaaVV[ +@CSHL_3_FC042AGLLWW:1:2:7:292 +GGAGAAATACACACAATTGGTTAATCCCCCTATATA ++CSHL_3_FC042AGLLWW:1:2:7:292 +babaaaaaaaUMaaaaaaaaaaa\XEUUEP_]UERE +@CSHL_3_FC042AGLLWW:1:2:7:1819 +AATTCAAACCACCCCAACCCACACACAGAGATACAA ++CSHL_3_FC042AGLLWW:1:2:7:1819 +a\\QVVVLaaLOEXUWUUEKUULEMUEUUKULIQMU +@CSHL_3_FC042AGLLWW:1:2:7:1875 +GCAAAAGAGTAGTGTACCCCCCCCCCCCCCCCCCCC ++CSHL_3_FC042AGLLWW:1:2:7:1875 +aaaaaaaaaXUXXEXaaaaa`_ZaaaaaaaaaXEXU +@CSHL_3_FC042AGLLWW:1:2:8:624 +ACTGCAATTGGTTAATCCCCCTATATAGCGCTGTGG ++CSHL_3_FC042AGLLWW:1:2:8:624 +aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa +@CSHL_3_FC042AGLLWW:1:2:8:250 +TGCCGCGCACACTGATGCAATTGGTTAATCCCCCTA ++CSHL_3_FC042AGLLWW:1:2:8:250 +aaaaaaaa^aaaaaabbb[KXPEU[RXZ^JUKRKXE diff --git a/tool_collections/fastx_toolkit/fastq_quality_filter/test-data/fastq_qual_filter1b.out b/tool_collections/fastx_toolkit/fastq_quality_filter/test-data/fastq_qual_filter1b.out new file mode 100644 index 00000000000..ea3db077f2e --- /dev/null +++ b/tool_collections/fastx_toolkit/fastq_quality_filter/test-data/fastq_qual_filter1b.out @@ -0,0 +1,36 @@ +@CSHL_3_FC042AGLLWW:1:2:7:203 +GTACGCATGACCGAACCCCCCNCCCCCCAATTGGTT ++CSHL_3_FC042AGLLWW:1:2:7:203 +aab^V^aU]`aa^aZaaaaaaaaabaaaaaaaa]]` +@CSHL_3_FC042AGLLWW:1:2:7:33 +CAATGCCTCCAATTGGTTAATCCCCCTATATATACT ++CSHL_3_FC042AGLLWW:1:2:7:33 +aaaaaaaaaW^U_XaWaa\WMEP^KEZXRPEEEGaa +@CSHL_3_FC042AGLLWW:1:2:7:169 +GCAGCAGGCGCGTCAGAGAGCCCCCCCCCCCCCCCC ++CSHL_3_FC042AGLLWW:1:2:7:169 +a_M^a\Uaaa_M_aaaZZZZZZUZUZaaV\ZUGUUR +@CSHL_3_FC042AGLLWW:1:2:7:1436 +AATTATTTATTAAATTTTAATAATATGGGAGACACT ++CSHL_3_FC042AGLLWW:1:2:7:1436 +a^aaaaaaaaaaaaaaa_U`aaaaa_S_aaaaaVV[ +@CSHL_3_FC042AGLLWW:1:2:7:292 +GGAGAAATACACACAATTGGTTAATCCCCCTATATA ++CSHL_3_FC042AGLLWW:1:2:7:292 +babaaaaaaaUMaaaaaaaaaaa\XEUUEP_]UERE +@CSHL_3_FC042AGLLWW:1:2:7:1819 +AATTCAAACCACCCCAACCCACACACAGAGATACAA ++CSHL_3_FC042AGLLWW:1:2:7:1819 +a\\QVVVLaaLOEXUWUUEKUULEMUEUUKULIQMU +@CSHL_3_FC042AGLLWW:1:2:7:1875 +GCAAAAGAGTAGTGTACCCCCCCCCCCCCCCCCCCC ++CSHL_3_FC042AGLLWW:1:2:7:1875 +aaaaaaaaaXUXXEXaaaaa`_ZaaaaaaaaaXEXU +@CSHL_3_FC042AGLLWW:1:2:8:624 +ACTGCAATTGGTTAATCCCCCTATATAGCGCTGTGG ++CSHL_3_FC042AGLLWW:1:2:8:624 +aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa +@CSHL_3_FC042AGLLWW:1:2:8:250 +TGCCGCGCACACTGATGCAATTGGTTAATCCCCCTA ++CSHL_3_FC042AGLLWW:1:2:8:250 +aaaaaaaa^aaaaaabbb[KXPEU[RXZ^JUKRKXE diff --git a/tool_collections/fastx_toolkit/fastq_to_fasta/.shed.yml b/tool_collections/fastx_toolkit/fastq_to_fasta/.shed.yml new file mode 100644 index 00000000000..84f26dfc3e8 --- /dev/null +++ b/tool_collections/fastx_toolkit/fastq_to_fasta/.shed.yml @@ -0,0 +1,11 @@ +categories: +- Fasta Manipulation +- Convert Formats +description: FASTQ to FASTA converter +long_description: | + This tool converts data from FASTQ format to FASTA format. +homepage_url: http://hannonlab.cshl.edu/fastx_toolkit/ +name: fastq_to_fasta +owner: devteam +remote_repository_url: https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/fastx_toolkit/fastq_to_fasta +type: unrestricted diff --git a/tool_collections/fastx_toolkit/fastq_to_fasta/fastq_to_fasta.xml b/tool_collections/fastx_toolkit/fastq_to_fasta/fastq_to_fasta.xml new file mode 100644 index 00000000000..0a26b09a835 --- /dev/null +++ b/tool_collections/fastx_toolkit/fastq_to_fasta/fastq_to_fasta.xml @@ -0,0 +1,84 @@ + + converter from FASTX-toolkit + + macros.xml + + + gunzip -cf $input | fastq_to_fasta $SKIPN $RENAMESEQ -o $output -v +#if $input.ext == "fastqsanger": +-Q 33 +#end if + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +**What it does** + +This tool converts data from Solexa format to FASTA format (scroll down for format description). + +-------- + +**Example** + +The following data in Solexa-FASTQ format:: + + @CSHL_4_FC042GAMMII_2_1_517_596 + GGTCAATGATGAGTTGGCACTGTAGGCACCATCAAT + +CSHL_4_FC042GAMMII_2_1_517_596 + 40 40 40 40 40 40 40 40 40 40 38 40 40 40 40 40 14 40 40 40 40 40 36 40 13 14 24 24 9 24 9 40 10 10 15 40 + +Will be converted to FASTA (with 'rename sequence names' = NO):: + + >CSHL_4_FC042GAMMII_2_1_517_596 + GGTCAATGATGAGTTGGCACTGTAGGCACCATCAAT + +Will be converted to FASTA (with 'rename sequence names' = YES):: + + >1 + GGTCAATGATGAGTTGGCACTGTAGGCACCATCAAT + +------ + +This tool is based on `FASTX-toolkit`__ by Assaf Gordon. + + .. __: http://hannonlab.cshl.edu/fastx_toolkit/ + + + + diff --git a/tool_collections/fastx_toolkit/fastq_to_fasta/macros.xml b/tool_collections/fastx_toolkit/fastq_to_fasta/macros.xml new file mode 120000 index 00000000000..0c6ff6c3756 --- /dev/null +++ b/tool_collections/fastx_toolkit/fastq_to_fasta/macros.xml @@ -0,0 +1 @@ +../macros.xml \ No newline at end of file diff --git a/tool_collections/fastx_toolkit/fastq_to_fasta/test-data/fastq_to_fasta1.fastq b/tool_collections/fastx_toolkit/fastq_to_fasta/test-data/fastq_to_fasta1.fastq new file mode 100644 index 00000000000..d3386dda038 --- /dev/null +++ b/tool_collections/fastx_toolkit/fastq_to_fasta/test-data/fastq_to_fasta1.fastq @@ -0,0 +1,36 @@ +@CSHL_3_FC042AGLLWW:1:2:7:203 +GTACGCATGACCGAACCCCCCNCCCCCCAATTGGTT ++CSHL_3_FC042AGLLWW:1:2:7:203 +aab^V^aU]`aa^aZaaabbXEZabaaaaaaaa]]` +@CSHL_3_FC042AGLLWW:1:2:7:33 +CAATGCCTCCAATTGGTTAATCCCCCTATATATACT ++CSHL_3_FC042AGLLWW:1:2:7:33 +Waaa^aZaaW^U_XaWaa\WMEP^KEZXRPEEEGaa +@CSHL_3_FC042AGLLWW:1:2:7:169 +GCAGCAGGCGCGTCAGAGAGCCCCCCCCCCCCCCCC ++CSHL_3_FC042AGLLWW:1:2:7:169 +a_M^a\Uaaa_M_aaaaaaaaaaaaaaaV\ZUGUUR +@CSHL_3_FC042AGLLWW:1:2:7:1436 +AATTATTTATTAAATTTTAATAATATGGGAGACACT ++CSHL_3_FC042AGLLWW:1:2:7:1436 +a^aaaaaaaaaaaaaaa_U`aaaaa_S_aaaaaVV[ +@CSHL_3_FC042AGLLWW:1:2:7:292 +GGAGAAATACACACAATTGGTTAATCCCCCTATATA ++CSHL_3_FC042AGLLWW:1:2:7:292 +babaaaaaaaUMaaaaaaaaaaa\XEUUEP_]UERE +@CSHL_3_FC042AGLLWW:1:2:7:1819 +AATTCAAACCACCCCAACCCACACACAGAGATACAA ++CSHL_3_FC042AGLLWW:1:2:7:1819 +a\\QVVVLaaLOEXUWUUEKUULEMUEUUKULIQMU +@CSHL_3_FC042AGLLWW:1:2:7:1875 +GCAAAAGAGTAGTGTACCCCCCCCCCCCCCCCCCCC ++CSHL_3_FC042AGLLWW:1:2:7:1875 +aaaaaaaaaXUXXEXaaaaa`_ZaaaaaaaaaXEXU +@CSHL_3_FC042AGLLWW:1:2:8:624 +ACTGCAATTGGTTAATCCCCCTATATAGCGCTGTGG ++CSHL_3_FC042AGLLWW:1:2:8:624 +aa[S^`X`aa_]]OOXMU^_[MU_aaaaaaaaaaaa +@CSHL_3_FC042AGLLWW:1:2:8:250 +TGCCGCGCACACTGATGCAATTGGTTAATCCCCCTA ++CSHL_3_FC042AGLLWW:1:2:8:250 +aaaaaaaa^aaaaaabbb[KXPEU[RXZ^JUKRKXE diff --git a/tool_collections/fastx_toolkit/fastq_to_fasta/test-data/fastq_to_fasta1a.out b/tool_collections/fastx_toolkit/fastq_to_fasta/test-data/fastq_to_fasta1a.out new file mode 100644 index 00000000000..c524fd4e12a --- /dev/null +++ b/tool_collections/fastx_toolkit/fastq_to_fasta/test-data/fastq_to_fasta1a.out @@ -0,0 +1,16 @@ +>CSHL_3_FC042AGLLWW:1:2:7:33 +CAATGCCTCCAATTGGTTAATCCCCCTATATATACT +>CSHL_3_FC042AGLLWW:1:2:7:169 +GCAGCAGGCGCGTCAGAGAGCCCCCCCCCCCCCCCC +>CSHL_3_FC042AGLLWW:1:2:7:1436 +AATTATTTATTAAATTTTAATAATATGGGAGACACT +>CSHL_3_FC042AGLLWW:1:2:7:292 +GGAGAAATACACACAATTGGTTAATCCCCCTATATA +>CSHL_3_FC042AGLLWW:1:2:7:1819 +AATTCAAACCACCCCAACCCACACACAGAGATACAA +>CSHL_3_FC042AGLLWW:1:2:7:1875 +GCAAAAGAGTAGTGTACCCCCCCCCCCCCCCCCCCC +>CSHL_3_FC042AGLLWW:1:2:8:624 +ACTGCAATTGGTTAATCCCCCTATATAGCGCTGTGG +>CSHL_3_FC042AGLLWW:1:2:8:250 +TGCCGCGCACACTGATGCAATTGGTTAATCCCCCTA diff --git a/tool_collections/fastx_toolkit/fastq_to_fasta/test-data/fastq_to_fasta1b.out b/tool_collections/fastx_toolkit/fastq_to_fasta/test-data/fastq_to_fasta1b.out new file mode 100644 index 00000000000..c5b35f433d9 --- /dev/null +++ b/tool_collections/fastx_toolkit/fastq_to_fasta/test-data/fastq_to_fasta1b.out @@ -0,0 +1,18 @@ +>1 +GTACGCATGACCGAACCCCCCNCCCCCCAATTGGTT +>2 +CAATGCCTCCAATTGGTTAATCCCCCTATATATACT +>3 +GCAGCAGGCGCGTCAGAGAGCCCCCCCCCCCCCCCC +>4 +AATTATTTATTAAATTTTAATAATATGGGAGACACT +>5 +GGAGAAATACACACAATTGGTTAATCCCCCTATATA +>6 +AATTCAAACCACCCCAACCCACACACAGAGATACAA +>7 +GCAAAAGAGTAGTGTACCCCCCCCCCCCCCCCCCCC +>8 +ACTGCAATTGGTTAATCCCCCTATATAGCGCTGTGG +>9 +TGCCGCGCACACTGATGCAATTGGTTAATCCCCCTA diff --git a/tool_collections/fastx_toolkit/fastx_artifacts_filter/.shed.yml b/tool_collections/fastx_toolkit/fastx_artifacts_filter/.shed.yml new file mode 100644 index 00000000000..e0d7e71a27e --- /dev/null +++ b/tool_collections/fastx_toolkit/fastx_artifacts_filter/.shed.yml @@ -0,0 +1,11 @@ +categories: +- Fasta Manipulation +- Fastq Manipulation +description: Remove sequencing artifacts +homepage_url: http://hannonlab.cshl.edu/fastx_toolkit/ +long_description: | + This tool filters sequencing artifacts (reads with all but 3 identical bases). +name: fastx_artifacts_filter +owner: devteam +remote_repository_url: https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/fastx_toolkit/fastx_artifacts_filter +type: unrestricted diff --git a/tool_collections/fastx_toolkit/fastx_artifacts_filter/fastx_artifacts_filter.xml b/tool_collections/fastx_toolkit/fastx_artifacts_filter/fastx_artifacts_filter.xml new file mode 100644 index 00000000000..2793aa35e7a --- /dev/null +++ b/tool_collections/fastx_toolkit/fastx_artifacts_filter/fastx_artifacts_filter.xml @@ -0,0 +1,94 @@ + + + + macros.xml + + + + + + + + + + + + + + + + + + + + + + + + + +**What it does** + +This tool filters sequencing artifacts (reads with all but 3 identical bases). + +-------- + +**The following is an example of sequences which will be filtered out**:: + + AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA + AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA + AAAAAAAAAAAAAAAAAAAACAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA + AAAAAAAAAAAAAAAAAAACACAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA + AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA + AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA + AAAAAAAAAAAAAAAAAAACACAAAAAAAAAAAAAAAAAAAAAAAAAAAAACACAAAAAA + AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA + AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA + AAAAAAAAAAAAAAAAAAAAAAAAAACAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA + AAAAAACAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA + AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA + CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC + AAAAACACAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA + AAAAAAAAAAAAAAAAAAAAAAAAACACAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA + AAAAAAAAAAAAAAAAAAACACAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACAAAAAAA + AAAAAAAAAAACAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA + AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACAAAAA + AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA + AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACAAAAAAA + AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACA + AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACAAAAAAAA + AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACAA + AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACAAA + AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACAAAA + AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACAAAAAAAAAAA + AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACAAAAAA + AAAAAAAAAACAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA + AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACAAAAAAAAAA + AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA + AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACAAAAAAAAA + AAAAAAAAAAAAAAAAAAACAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA + AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACAAAAAAAAAAAAA + AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA + AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACAAAAAAAAAAAAAAA + AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACAAAAAAAAAAAAAAAAAAAAAA + AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACAAAAAAAAAAAAAAAAAAAAAAAAA + AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA + AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA + AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACAAAAAAAAAAAAAAAAAAAAAAAA + AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACAAAAAAAAAAAAAAAA + AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACAAAAAAAAAAAA + +------ + +This tool is based on `FASTX-toolkit`__ by Assaf Gordon. + + .. __: http://hannonlab.cshl.edu/fastx_toolkit/ + + + + diff --git a/tool_collections/fastx_toolkit/fastx_artifacts_filter/macros.xml b/tool_collections/fastx_toolkit/fastx_artifacts_filter/macros.xml new file mode 120000 index 00000000000..0c6ff6c3756 --- /dev/null +++ b/tool_collections/fastx_toolkit/fastx_artifacts_filter/macros.xml @@ -0,0 +1 @@ +../macros.xml \ No newline at end of file diff --git a/tool_collections/fastx_toolkit/fastx_artifacts_filter/test-data/fastx_artifacts1.fasta b/tool_collections/fastx_toolkit/fastx_artifacts_filter/test-data/fastx_artifacts1.fasta new file mode 100644 index 00000000000..bd848fd4b27 --- /dev/null +++ b/tool_collections/fastx_toolkit/fastx_artifacts_filter/test-data/fastx_artifacts1.fasta @@ -0,0 +1,24 @@ +>CSHL_3_FC0420AGLLKK:2:1:233:1674 +GTTAGAGGGAATACACCCACTCTGTAGGCACCATC +>CSHL_3_FC0420AGLLKK:2:1:237:1037 +GTGATAGATTGTCTTGTTGTTCTGTAGGCACCATC +>CSHL_3_FC0420AGLLKK:2:1:1601:1525 +AAAAACACAAAAAAAAAAAAAAAAAAAAAAAAAAA +>CSHL_3_FC0420AGLLKK:2:1:1805:1464 +GATGCGTTCGAGATGGGTGCGCTGTAGGCACCATC +>CSHL_3_FC0420AGLLKK:2:1:1713:528 +AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA +>CSHL_3_FC0420AGLLKK:2:1:126:1087 +GAGATATTCGAATGCATCATCAGATGGCACCATCA +>CSHL_3_FC0420AGLLKK:2:1:1488:1323 +GTTTTTTCCCCTAATCTGAGTCTGTAGGCACCATC +>CSHL_3_FC0420AGLLKK:2:1:1236:1157 +AAAAAAAAAAAAAAAACAAAAAAAAAAAAAACAAA +>CSHL_3_FC0420AGLLKK:2:1:727:1020 +GTAATATAGTTGATAAGAATCTGCAGAGAGAATCA +>CSHL_3_FC0420AGLLKK:2:1:758:1799 +GTAGAGACCCCCTAATAGAGTCTGTAGGCACCATC +>CSHL_3_FC0420AGLLKK:2:1:1818:550 +AAAAAAAAAAAAAAAACAAAAACAAAAAAAACAAA +>CSHL_3_FC0420AGLLKK:2:1:1764:391 +CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC diff --git a/tool_collections/fastx_toolkit/fastx_artifacts_filter/test-data/fastx_artifacts1.out b/tool_collections/fastx_toolkit/fastx_artifacts_filter/test-data/fastx_artifacts1.out new file mode 100644 index 00000000000..77833e34f4d --- /dev/null +++ b/tool_collections/fastx_toolkit/fastx_artifacts_filter/test-data/fastx_artifacts1.out @@ -0,0 +1,14 @@ +>CSHL_3_FC0420AGLLKK:2:1:233:1674 +GTTAGAGGGAATACACCCACTCTGTAGGCACCATC +>CSHL_3_FC0420AGLLKK:2:1:237:1037 +GTGATAGATTGTCTTGTTGTTCTGTAGGCACCATC +>CSHL_3_FC0420AGLLKK:2:1:1805:1464 +GATGCGTTCGAGATGGGTGCGCTGTAGGCACCATC +>CSHL_3_FC0420AGLLKK:2:1:126:1087 +GAGATATTCGAATGCATCATCAGATGGCACCATCA +>CSHL_3_FC0420AGLLKK:2:1:1488:1323 +GTTTTTTCCCCTAATCTGAGTCTGTAGGCACCATC +>CSHL_3_FC0420AGLLKK:2:1:727:1020 +GTAATATAGTTGATAAGAATCTGCAGAGAGAATCA +>CSHL_3_FC0420AGLLKK:2:1:758:1799 +GTAGAGACCCCCTAATAGAGTCTGTAGGCACCATC diff --git a/tool_collections/fastx_toolkit/fastx_artifacts_filter/test-data/fastx_artifacts2.fastq b/tool_collections/fastx_toolkit/fastx_artifacts_filter/test-data/fastx_artifacts2.fastq new file mode 100644 index 00000000000..e3caaf2d9f2 --- /dev/null +++ b/tool_collections/fastx_toolkit/fastx_artifacts_filter/test-data/fastx_artifacts2.fastq @@ -0,0 +1,60 @@ +@CSHL_3_FC0420AGLLKK:2:1:233:1674 +GTTAGAGGGAATACACCCACTCTGTAGGCACCATC ++CSHL_3_FC0420AGLLKK:2:1:233:1674 +40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 32 40 40 40 40 16 20 25 9 21 37 40 40 16 29 26 30 +@CSHL_3_FC0420AGLLKK:2:1:136:448 +GTTCTCAGGACCCCTTCAGTAGTNGGCACCATCAA ++CSHL_3_FC0420AGLLKK:2:1:136:448 +40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 -5 13 17 28 40 40 8 17 27 8 13 10 +@CSHL_3_FC0420AGLLKK:2:1:237:1037 +GTGATAGATTGTCTTGTTGTTCTGTAGGCACCATC ++CSHL_3_FC0420AGLLKK:2:1:237:1037 +40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 4 40 40 26 35 40 38 40 6 40 40 0 3 26 32 27 14 11 26 11 +@CSHL_3_FC0420AGLLKK:2:1:1601:1525 +AAAAACACAAAAAAAAAAAAAAAAAAAAAAAAAAA ++CSHL_3_FC0420AGLLKK:2:1:1601:1525 +40 40 40 40 40 40 40 40 40 40 40 40 35 40 40 12 40 40 30 30 40 40 40 12 36 23 17 24 18 22 25 15 10 34 14 +@CSHL_3_FC0420AGLLKK:2:1:1805:1464 +GATGCGTTCGAGATGGGTGCGCTGTAGGCACCATC ++CSHL_3_FC0420AGLLKK:2:1:1805:1464 +40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 16 23 28 40 21 40 9 37 13 20 21 7 11 14 14 6 23 10 +@CSHL_3_FC0420AGLLKK:2:1:1713:528 +AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA ++CSHL_3_FC0420AGLLKK:2:1:1713:528 +40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 32 40 12 38 15 22 20 17 14 12 10 7 22 11 +@CSHL_3_FC0420AGLLKK:2:1:126:1087 +GAGATATTCGAATGCATCATCAGATGGCACCATCA ++CSHL_3_FC0420AGLLKK:2:1:126:1087 +40 40 40 40 40 40 40 40 40 40 40 40 40 40 25 40 40 40 40 40 40 40 31 40 40 11 10 23 40 13 12 17 37 17 22 +@CSHL_3_FC0420AGLLKK:2:1:1488:1323 +GTTTTTTCCCCTAATCTGAGTCTGTAGGCACCATC ++CSHL_3_FC0420AGLLKK:2:1:1488:1323 +40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 39 22 31 40 40 12 29 22 0 7 12 8 18 7 3 18 9 +@CSHL_3_FC0420AGLLKK:2:1:913:199 +GTTCAGTGTTGGTGCACTGTGTTNTAGGCACCATC ++CSHL_3_FC0420AGLLKK:2:1:913:199 +40 40 39 40 40 40 40 40 40 40 40 40 4 40 40 24 34 20 33 21 36 32 40 -5 40 13 21 21 26 17 18 25 14 25 21 +@CSHL_3_FC0420AGLLKK:2:1:1236:1157 +AAAAAAAAAAAAAAAACAAAAAAAAAAAAAACAAA ++CSHL_3_FC0420AGLLKK:2:1:1236:1157 +40 40 40 40 40 40 40 40 40 40 40 40 40 35 40 40 40 40 40 33 40 37 40 40 40 18 16 20 23 22 31 26 10 22 19 +@CSHL_3_FC0420AGLLKK:2:1:928:765 +GTTTTCAGTTCGAGGTTCGTGCTNTAGGCATTATC ++CSHL_3_FC0420AGLLKK:2:1:928:765 +40 40 40 40 40 40 40 40 40 40 40 40 40 25 27 40 37 35 27 40 40 17 40 -5 36 11 19 15 19 16 11 12 12 23 11 +@CSHL_3_FC0420AGLLKK:2:1:727:1020 +GTAATATAGTTGATAAGAATCTGCAGAGAGAATCA ++CSHL_3_FC0420AGLLKK:2:1:727:1020 +40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 30 40 40 24 18 38 33 26 16 23 22 16 18 +@CSHL_3_FC0420AGLLKK:2:1:758:1799 +GTAGAGACCCCCTAATAGAGTCTGTAGGCACCATC ++CSHL_3_FC0420AGLLKK:2:1:758:1799 +40 40 40 40 40 40 40 40 35 40 39 40 40 27 20 40 17 34 15 40 40 40 40 15 28 17 4 12 10 10 18 14 3 14 11 +@CSHL_3_FC0420AGLLKK:2:1:1818:550 +AAAAAAAAAAAAAAAACAAAAACAAAAAAAACAAA ++CSHL_3_FC0420AGLLKK:2:1:1818:550 +40 40 40 40 40 40 40 40 40 40 40 40 40 40 36 32 40 33 40 40 38 37 40 28 29 27 22 13 20 19 17 17 13 33 18 +@CSHL_3_FC0420AGLLKK:2:1:1764:391 +CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC ++CSHL_3_FC0420AGLLKK:2:1:1764:391 +40 40 40 40 40 40 40 40 40 40 40 33 40 40 40 40 40 24 40 40 40 40 40 12 40 24 14 9 22 15 29 18 11 40 22 diff --git a/tool_collections/fastx_toolkit/fastx_artifacts_filter/test-data/fastx_artifacts2.out b/tool_collections/fastx_toolkit/fastx_artifacts_filter/test-data/fastx_artifacts2.out new file mode 100644 index 00000000000..d82412b46e3 --- /dev/null +++ b/tool_collections/fastx_toolkit/fastx_artifacts_filter/test-data/fastx_artifacts2.out @@ -0,0 +1,40 @@ +@CSHL_3_FC0420AGLLKK:2:1:233:1674 +GTTAGAGGGAATACACCCACTCTGTAGGCACCATC ++CSHL_3_FC0420AGLLKK:2:1:233:1674 +40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 32 40 40 40 40 16 20 25 9 21 37 40 40 16 29 26 30 +@CSHL_3_FC0420AGLLKK:2:1:136:448 +GTTCTCAGGACCCCTTCAGTAGTNGGCACCATCAA ++CSHL_3_FC0420AGLLKK:2:1:136:448 +40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 -5 13 17 28 40 40 8 17 27 8 13 10 +@CSHL_3_FC0420AGLLKK:2:1:237:1037 +GTGATAGATTGTCTTGTTGTTCTGTAGGCACCATC ++CSHL_3_FC0420AGLLKK:2:1:237:1037 +40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 4 40 40 26 35 40 38 40 6 40 40 0 3 26 32 27 14 11 26 11 +@CSHL_3_FC0420AGLLKK:2:1:1805:1464 +GATGCGTTCGAGATGGGTGCGCTGTAGGCACCATC ++CSHL_3_FC0420AGLLKK:2:1:1805:1464 +40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 16 23 28 40 21 40 9 37 13 20 21 7 11 14 14 6 23 10 +@CSHL_3_FC0420AGLLKK:2:1:126:1087 +GAGATATTCGAATGCATCATCAGATGGCACCATCA ++CSHL_3_FC0420AGLLKK:2:1:126:1087 +40 40 40 40 40 40 40 40 40 40 40 40 40 40 25 40 40 40 40 40 40 40 31 40 40 11 10 23 40 13 12 17 37 17 22 +@CSHL_3_FC0420AGLLKK:2:1:1488:1323 +GTTTTTTCCCCTAATCTGAGTCTGTAGGCACCATC ++CSHL_3_FC0420AGLLKK:2:1:1488:1323 +40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 39 22 31 40 40 12 29 22 0 7 12 8 18 7 3 18 9 +@CSHL_3_FC0420AGLLKK:2:1:913:199 +GTTCAGTGTTGGTGCACTGTGTTNTAGGCACCATC ++CSHL_3_FC0420AGLLKK:2:1:913:199 +40 40 39 40 40 40 40 40 40 40 40 40 4 40 40 24 34 20 33 21 36 32 40 -5 40 13 21 21 26 17 18 25 14 25 21 +@CSHL_3_FC0420AGLLKK:2:1:928:765 +GTTTTCAGTTCGAGGTTCGTGCTNTAGGCATTATC ++CSHL_3_FC0420AGLLKK:2:1:928:765 +40 40 40 40 40 40 40 40 40 40 40 40 40 25 27 40 37 35 27 40 40 17 40 -5 36 11 19 15 19 16 11 12 12 23 11 +@CSHL_3_FC0420AGLLKK:2:1:727:1020 +GTAATATAGTTGATAAGAATCTGCAGAGAGAATCA ++CSHL_3_FC0420AGLLKK:2:1:727:1020 +40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 30 40 40 24 18 38 33 26 16 23 22 16 18 +@CSHL_3_FC0420AGLLKK:2:1:758:1799 +GTAGAGACCCCCTAATAGAGTCTGTAGGCACCATC ++CSHL_3_FC0420AGLLKK:2:1:758:1799 +40 40 40 40 40 40 40 40 35 40 39 40 40 27 20 40 17 34 15 40 40 40 40 15 28 17 4 12 10 10 18 14 3 14 11 diff --git a/tool_collections/fastx_toolkit/fastx_barcode_splitter/.shed.yml b/tool_collections/fastx_toolkit/fastx_barcode_splitter/.shed.yml new file mode 100644 index 00000000000..69416b9584e --- /dev/null +++ b/tool_collections/fastx_toolkit/fastx_barcode_splitter/.shed.yml @@ -0,0 +1,11 @@ +categories: +- Fasta Manipulation +- Fastq Manipulation +description: Barcode Splitter +long_description: | + This tool splits a FASTQ file or a regular FASTA file into several files, using barcodes as the split criteria. +homepage_url: http://hannonlab.cshl.edu/fastx_toolkit/ +name: fastx_barcode_splitter +owner: devteam +remote_repository_url: https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/fastx_toolkit/fastx_barcode_splitter +type: unrestricted diff --git a/tool_collections/fastx_toolkit/fastx_barcode_splitter/barcode_splitter_output_example.png b/tool_collections/fastx_toolkit/fastx_barcode_splitter/barcode_splitter_output_example.png new file mode 100644 index 00000000000..c0f8944cc8b Binary files /dev/null and b/tool_collections/fastx_toolkit/fastx_barcode_splitter/barcode_splitter_output_example.png differ diff --git a/tool_collections/fastx_toolkit/fastx_barcode_splitter/fastx_barcode_splitter.xml b/tool_collections/fastx_toolkit/fastx_barcode_splitter/fastx_barcode_splitter.xml new file mode 100644 index 00000000000..9bdae3df631 --- /dev/null +++ b/tool_collections/fastx_toolkit/fastx_barcode_splitter/fastx_barcode_splitter.xml @@ -0,0 +1,75 @@ + + + + macros.xml + + + bash $__tool_directory__/fastx_barcode_splitter_galaxy_wrapper.sh '$BARCODE' '$input' "$input.name" "$output.files_path" --mismatches $mismatches --partial $partial $EOL > '$output' + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +**What it does** + +This tool splits a Solexa library (FASTQ file) or a regular FASTA file into several files, using barcodes as the split criteria. + +-------- + +**Barcode file Format** + +Barcode files are simple text files. +Each line should contain an identifier (descriptive name for the barcode), and the barcode itself (A/C/G/T), separated by a TAB character. +Example:: + + #This line is a comment (starts with a 'number' sign) + BC1 GATCT + BC2 ATCGT + BC3 GTGAT + BC4 TGTCT + +For each barcode, a new FASTQ file will be created (with the barcode's identifier as part of the file name). +Sequences matching the barcode will be stored in the appropriate file. + +One additional FASTQ file will be created (the 'unmatched' file), where sequences not matching any barcode will be stored. + +The output of this tool is an HTML file, displaying the split counts and the file locations. + +**Output Example** + +.. image:: barcode_splitter_output_example.png + + +------ + +This tool is based on `FASTX-toolkit`__ by Assaf Gordon. + + .. __: http://hannonlab.cshl.edu/fastx_toolkit/ + + + + diff --git a/tool_collections/fastx_toolkit/fastx_barcode_splitter/fastx_barcode_splitter_galaxy_wrapper.sh b/tool_collections/fastx_toolkit/fastx_barcode_splitter/fastx_barcode_splitter_galaxy_wrapper.sh new file mode 100755 index 00000000000..976404c99aa --- /dev/null +++ b/tool_collections/fastx_toolkit/fastx_barcode_splitter/fastx_barcode_splitter_galaxy_wrapper.sh @@ -0,0 +1,80 @@ +#!/bin/bash + +# FASTX-toolkit - FASTA/FASTQ preprocessing tools. +# Copyright (C) 2009 A. Gordon (gordon@cshl.edu) +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +# +#This is a shell script wrapper for 'fastx_barcode_splitter.pl' +# +# 1. Output files are saved at the dataset's files_path directory. +# +# 2. 'fastx_barcode_splitter.pl' outputs a textual table. +# This script turns it into pretty HTML with working URL +# (so lazy users can just click on the URLs and get their files) + +BARCODE_FILE="$1" +FASTQ_FILE="$2" +LIBNAME="$3" +OUTPUT_PATH="$4" +shift 4 +# The rest of the parameters are passed to the split program + +if [ "$OUTPUT_PATH" == "" ]; then + echo "Usage: $0 [BARCODE FILE] [FASTQ FILE] [LIBRARY_NAME] [OUTPUT_PATH]" >&2 + exit 1 +fi + +#Sanitize library name, make sure we can create a file with this name +LIBNAME=${LIBNAME//\.gz/} +LIBNAME=${LIBNAME//\.txt/} +LIBNAME=${LIBNAME//[^[:alnum:]]/_} + +if [ ! -r "$FASTQ_FILE" ]; then + echo "Error: Input file ($FASTQ_FILE) not found!" >&2 + exit 1 +fi +if [ ! -r "$BARCODE_FILE" ]; then + echo "Error: barcode file ($BARCODE_FILE) not found!" >&2 + exit 1 +fi +mkdir -p "$OUTPUT_PATH" +if [ ! -d "$OUTPUT_PATH" ]; then + echo "Error: failed to create output path '$OUTPUT_PATH'" >&2 + exit 1 +fi + +PUBLICURL="" +BASEPATH="$OUTPUT_PATH/" +#PREFIX="$BASEPATH"`date "+%Y-%m-%d_%H%M__"`"${LIBNAME}__" +PREFIX="$BASEPATH""${LIBNAME}__" +SUFFIX=".txt" + +RESULTS=`zcat -f < "$FASTQ_FILE" | fastx_barcode_splitter.pl --bcfile "$BARCODE_FILE" --prefix "$PREFIX" --suffix "$SUFFIX" "$@"` +if [ $? != 0 ]; then + echo "error" +fi + +# +# Convert the textual tab-separated table into simple HTML table, +# with the local path replaces with a valid URL +echo "" +echo "$RESULTS" | sed -r "s|$BASEPATH(.*)|\\1|" | sed ' +i
+s|\t||g +a<\/td><\/tr> +' +echo "

" +echo "

" diff --git a/tool_collections/fastx_toolkit/fastx_barcode_splitter/macros.xml b/tool_collections/fastx_toolkit/fastx_barcode_splitter/macros.xml new file mode 120000 index 00000000000..0c6ff6c3756 --- /dev/null +++ b/tool_collections/fastx_toolkit/fastx_barcode_splitter/macros.xml @@ -0,0 +1 @@ +../macros.xml \ No newline at end of file diff --git a/tool_collections/fastx_toolkit/fastx_barcode_splitter/test-data/fastx_barcode_splitter1.fastq b/tool_collections/fastx_toolkit/fastx_barcode_splitter/test-data/fastx_barcode_splitter1.fastq new file mode 100644 index 00000000000..7cbcf9977bd --- /dev/null +++ b/tool_collections/fastx_toolkit/fastx_barcode_splitter/test-data/fastx_barcode_splitter1.fastq @@ -0,0 +1,168 @@ +@CSHL_3_FC042AGLLWW:1:2:7:203 +GATCTAGTAGTAGTAGA ++CSHL_3_FC042AGLLWW:1:2:7:203 +aab^V^aU]`aa^aZaa +@CSHL_3_FC042AGLLWW:1:2:7:203 +GATCTAGTAGTAGTAGA ++CSHL_3_FC042AGLLWW:1:2:7:203 +aab^V^aU]`aa^aZaa +@CSHL_3_FC042AGLLWW:1:2:7:203 +GATCTAGTAGTAGTAGA ++CSHL_3_FC042AGLLWW:1:2:7:203 +aab^V^aU]`aa^aZaa +@CSHL_3_FC042AGLLWW:1:2:7:203 +GATCTAGTAGTAGTAGA ++CSHL_3_FC042AGLLWW:1:2:7:203 +aab^V^aU]`aa^aZaa +@CSHL_3_FC042AGLLWW:1:2:7:203 +GATCTAGTAGTAGTAGA ++CSHL_3_FC042AGLLWW:1:2:7:203 +aab^V^aU]`aa^aZaa +@CSHL_3_FC042AGLLWW:1:2:7:203 +GGTCTAGTAGTAGTAGA ++CSHL_3_FC042AGLLWW:1:2:7:203 +aab^V^aU]`aa^aZaa +@CSHL_3_FC042AGLLWW:1:2:7:203 +GGTCTTCTCTATGTACA ++CSHL_3_FC042AGLLWW:1:2:7:203 +aab^V^aU]`aa^aZaa +@CSHL_3_FC042AGLLWW:1:2:7:203 +GGTCTGAGTATACACAT ++CSHL_3_FC042AGLLWW:1:2:7:203 +aab^V^aU]`aa^aZaa +@CSHL_3_FC042AGLLWW:1:2:7:203 +GGTATTCTCTATGTACA ++CSHL_3_FC042AGLLWW:1:2:7:203 +aab^V^aU]`aa^aZaa +@CSHL_3_FC042AGLLWW:1:2:7:203 +GGTATTCTCTATGTACA ++CSHL_3_FC042AGLLWW:1:2:7:203 +aab^V^aU]`aa^aZaa +@CSHL_3_FC042AGLLWW:1:2:7:203 +GGTATTCTCTATGTACA ++CSHL_3_FC042AGLLWW:1:2:7:203 +aab^V^aU]`aa^aZaa +@CSHL_3_FC042AGLLWW:1:2:7:203 +GGTACGAGTATACACAT ++CSHL_3_FC042AGLLWW:1:2:7:203 +aab^V^aU]`aa^aZaa +@CSHL_3_FC042AGLLWW:1:2:7:203 +GGTACTCTCTATGTACA ++CSHL_3_FC042AGLLWW:1:2:7:203 +aab^V^aU]`aa^aZaa +@CSHL_3_FC042AGLLWW:1:2:7:203 +GGTACGAGTATACACAT ++CSHL_3_FC042AGLLWW:1:2:7:203 +aab^V^aU]`aa^aZaa +@CSHL_3_FC042AGLLWW:1:2:7:203 +ATCGTTCTCTATGTACA ++CSHL_3_FC042AGLLWW:1:2:7:203 +aab^V^aU]`aa^aZaa +@CSHL_3_FC042AGLLWW:1:2:7:203 +ATCGTGAGTATACACAT ++CSHL_3_FC042AGLLWW:1:2:7:203 +aab^V^aU]`aa^aZaa +@CSHL_3_FC042AGLLWW:1:2:7:203 +ATCGTTCTCTATGTACA ++CSHL_3_FC042AGLLWW:1:2:7:203 +aab^V^aU]`aa^aZaa +@CSHL_3_FC042AGLLWW:1:2:7:203 +ATCGTGAGTATACACAT ++CSHL_3_FC042AGLLWW:1:2:7:203 +aab^V^aU]`aa^aZaa +@CSHL_3_FC042AGLLWW:1:2:7:203 +ATCTTTCTCTATGTACA ++CSHL_3_FC042AGLLWW:1:2:7:203 +aab^V^aU]`aa^aZaa +@CSHL_3_FC042AGLLWW:1:2:7:203 +ATCTTGAGTATACACAT ++CSHL_3_FC042AGLLWW:1:2:7:203 +aab^V^aU]`aa^aZaa +@CSHL_3_FC042AGLLWW:1:2:7:203 +ATCTTGAGTATACACAT ++CSHL_3_FC042AGLLWW:1:2:7:203 +aab^V^aU]`aa^aZaa +@CSHL_3_FC042AGLLWW:1:2:7:203 +ATCTTTCTCTATGTACA ++CSHL_3_FC042AGLLWW:1:2:7:203 +aab^V^aU]`aa^aZaa +@CSHL_3_FC042AGLLWW:1:2:7:203 +ATCTCGAGTATACACAT ++CSHL_3_FC042AGLLWW:1:2:7:203 +aab^V^aU]`aa^aZaa +@CSHL_3_FC042AGLLWW:1:2:7:203 +ATCTCGAGTATACACAT ++CSHL_3_FC042AGLLWW:1:2:7:203 +aab^V^aU]`aa^aZaa +@CSHL_3_FC042AGLLWW:1:2:7:203 +ATCTCTCTCTATGTACA ++CSHL_3_FC042AGLLWW:1:2:7:203 +aab^V^aU]`aa^aZaa +@CSHL_3_FC042AGLLWW:1:2:7:203 +ATCTCGAGTATACACAT ++CSHL_3_FC042AGLLWW:1:2:7:203 +aab^V^aU]`aa^aZaa +@CSHL_3_FC042AGLLWW:1:2:7:203 +GGAATGAGTATACACAT ++CSHL_3_FC042AGLLWW:1:2:7:203 +aab^V^aU]`aa^aZaa +@CSHL_3_FC042AGLLWW:1:2:7:203 +GGAATTCTCTATGTACA ++CSHL_3_FC042AGLLWW:1:2:7:203 +aab^V^aU]`aa^aZaa +@CSHL_3_FC042AGLLWW:1:2:7:203 +GGAATGAGTATACACAT ++CSHL_3_FC042AGLLWW:1:2:7:203 +aab^V^aU]`aa^aZaa +@CSHL_3_FC042AGLLWW:1:2:7:203 +GGAATTCTCTATGTACA ++CSHL_3_FC042AGLLWW:1:2:7:203 +aab^V^aU]`aa^aZaa +@CSHL_3_FC042AGLLWW:1:2:7:203 +GGAATGAGTATACACAT ++CSHL_3_FC042AGLLWW:1:2:7:203 +aab^V^aU]`aa^aZaa +@CSHL_3_FC042AGLLWW:1:2:7:203 +GGAATTCTCTATGTACA ++CSHL_3_FC042AGLLWW:1:2:7:203 +aab^V^aU]`aa^aZaa +@CSHL_3_FC042AGLLWW:1:2:7:203 +GGAATGAGTATACACAT ++CSHL_3_FC042AGLLWW:1:2:7:203 +aab^V^aU]`aa^aZaa +@CSHL_3_FC042AGLLWW:1:2:7:203 +GGAATTCTCTATGTACA ++CSHL_3_FC042AGLLWW:1:2:7:203 +aab^V^aU]`aa^aZaa +@CSHL_3_FC042AGLLWW:1:2:7:203 +GGAATGAGTATACACAT ++CSHL_3_FC042AGLLWW:1:2:7:203 +aab^V^aU]`aa^aZaa +@CSHL_3_FC042AGLLWW:1:2:7:203 +TAGTTGAGTATACACAT ++CSHL_3_FC042AGLLWW:1:2:7:203 +aab^V^aU]`aa^aZaa +@CSHL_3_FC042AGLLWW:1:2:7:203 +TAGTTGAGTATACACAT ++CSHL_3_FC042AGLLWW:1:2:7:203 +aab^V^aU]`aa^aZaa +@CSHL_3_FC042AGLLWW:1:2:7:203 +TAGTTTCTCTATGTACA ++CSHL_3_FC042AGLLWW:1:2:7:203 +aab^V^aU]`aa^aZaa +@CSHL_3_FC042AGLLWW:1:2:7:203 +TAGTTTCTCTATGTACA ++CSHL_3_FC042AGLLWW:1:2:7:203 +aab^V^aU]`aa^aZaa +@CSHL_3_FC042AGLLWW:1:2:7:203 +TAGTTGAGTATACACAT ++CSHL_3_FC042AGLLWW:1:2:7:203 +aab^V^aU]`aa^aZaa +@CSHL_3_FC042AGLLWW:1:2:7:203 +TAGTTTCTCTATGTACA ++CSHL_3_FC042AGLLWW:1:2:7:203 +aab^V^aU]`aa^aZaa +@CSHL_3_FC042AGLLWW:1:2:7:203 +TGTCTGAGTATACACAT ++CSHL_3_FC042AGLLWW:1:2:7:203 +aab^V^aU]`aa^aZaa \ No newline at end of file diff --git a/tool_collections/fastx_toolkit/fastx_barcode_splitter/test-data/fastx_barcode_splitter1.out b/tool_collections/fastx_toolkit/fastx_barcode_splitter/test-data/fastx_barcode_splitter1.out new file mode 100644 index 00000000000..62baea3e96c --- /dev/null +++ b/tool_collections/fastx_toolkit/fastx_barcode_splitter/test-data/fastx_barcode_splitter1.out @@ -0,0 +1,24 @@ + + + + + + + + +

+

+BarcodeCountLocation +
+BC111fastx_barcode_splitter1_fastq__BC1.txt +
+BC212fastx_barcode_splitter1_fastq__BC2.txt +
+BC39fastx_barcode_splitter1_fastq__BC3.txt +
+BC41fastx_barcode_splitter1_fastq__BC4.txt +
+unmatched9fastx_barcode_splitter1_fastq__unmatched.txt +
+total42 +
diff --git a/tool_collections/fastx_toolkit/fastx_barcode_splitter/test-data/fastx_barcode_splitter1.txt b/tool_collections/fastx_toolkit/fastx_barcode_splitter/test-data/fastx_barcode_splitter1.txt new file mode 100644 index 00000000000..8a7ba617b8d --- /dev/null +++ b/tool_collections/fastx_toolkit/fastx_barcode_splitter/test-data/fastx_barcode_splitter1.txt @@ -0,0 +1,4 @@ +BC1 GATCT +BC2 ATCGT +BC3 GTGAT +BC4 TGTCT \ No newline at end of file diff --git a/tool_collections/fastx_toolkit/fastx_clipper/.shed.yml b/tool_collections/fastx_toolkit/fastx_clipper/.shed.yml new file mode 100644 index 00000000000..85d233f0e30 --- /dev/null +++ b/tool_collections/fastx_toolkit/fastx_clipper/.shed.yml @@ -0,0 +1,11 @@ +categories: +- Fasta Manipulation +- Fastq Manipulation +description: Clip adapter sequences +long_description: | + This tool clips adapters from the 3''-end of the sequences in a FASTA/FASTQ file. +name: fastx_clipper +owner: devteam +homepage_url: http://hannonlab.cshl.edu/fastx_toolkit/ +remote_repository_url: https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/fastx_toolkit/fastx_clipper +type: unrestricted diff --git a/tool_collections/fastx_toolkit/fastx_clipper/fastx_clipper.xml b/tool_collections/fastx_toolkit/fastx_clipper/fastx_clipper.xml new file mode 100644 index 00000000000..371d681ebb8 --- /dev/null +++ b/tool_collections/fastx_toolkit/fastx_clipper/fastx_clipper.xml @@ -0,0 +1,103 @@ + + adapter sequences + + macros.xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + use this for hairpin barcoding. keep at 0 unless you know what you're doing. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +**What it does** + +This tool clips adapters from the 3'-end of the sequences in a FASTA/FASTQ file. + +-------- + + +**Clipping Illustration:** + +.. image:: fastx_clipper_illustration.png + +**Clipping Example:** + +.. image:: fastx_clipper_example.png + +**In the above example:** + +* Sequence no. 1 was discarded since it wasn't clipped (i.e. didn't contain the adapter sequence). (**Output** parameter). +* Sequence no. 5 was discarded --- it's length (after clipping) was shorter than 15 nt (**Minimum Sequence Length** parameter). + +------ + +This tool is based on `FASTX-toolkit`__ by Assaf Gordon. + + .. __: http://hannonlab.cshl.edu/fastx_toolkit/ + + + diff --git a/tool_collections/fastx_toolkit/fastx_clipper/fastx_clipper_example.png b/tool_collections/fastx_toolkit/fastx_clipper/fastx_clipper_example.png new file mode 100644 index 00000000000..d04d629d548 Binary files /dev/null and b/tool_collections/fastx_toolkit/fastx_clipper/fastx_clipper_example.png differ diff --git a/tool_collections/fastx_toolkit/fastx_clipper/fastx_clipper_illustration.png b/tool_collections/fastx_toolkit/fastx_clipper/fastx_clipper_illustration.png new file mode 100644 index 00000000000..5acf892fac9 Binary files /dev/null and b/tool_collections/fastx_toolkit/fastx_clipper/fastx_clipper_illustration.png differ diff --git a/tool_collections/fastx_toolkit/fastx_clipper/macros.xml b/tool_collections/fastx_toolkit/fastx_clipper/macros.xml new file mode 120000 index 00000000000..0c6ff6c3756 --- /dev/null +++ b/tool_collections/fastx_toolkit/fastx_clipper/macros.xml @@ -0,0 +1 @@ +../macros.xml \ No newline at end of file diff --git a/tool_collections/fastx_toolkit/fastx_clipper/test-data/fastx_clipper1.fastq b/tool_collections/fastx_toolkit/fastx_clipper/test-data/fastx_clipper1.fastq new file mode 100644 index 00000000000..d3386dda038 --- /dev/null +++ b/tool_collections/fastx_toolkit/fastx_clipper/test-data/fastx_clipper1.fastq @@ -0,0 +1,36 @@ +@CSHL_3_FC042AGLLWW:1:2:7:203 +GTACGCATGACCGAACCCCCCNCCCCCCAATTGGTT ++CSHL_3_FC042AGLLWW:1:2:7:203 +aab^V^aU]`aa^aZaaabbXEZabaaaaaaaa]]` +@CSHL_3_FC042AGLLWW:1:2:7:33 +CAATGCCTCCAATTGGTTAATCCCCCTATATATACT ++CSHL_3_FC042AGLLWW:1:2:7:33 +Waaa^aZaaW^U_XaWaa\WMEP^KEZXRPEEEGaa +@CSHL_3_FC042AGLLWW:1:2:7:169 +GCAGCAGGCGCGTCAGAGAGCCCCCCCCCCCCCCCC ++CSHL_3_FC042AGLLWW:1:2:7:169 +a_M^a\Uaaa_M_aaaaaaaaaaaaaaaV\ZUGUUR +@CSHL_3_FC042AGLLWW:1:2:7:1436 +AATTATTTATTAAATTTTAATAATATGGGAGACACT ++CSHL_3_FC042AGLLWW:1:2:7:1436 +a^aaaaaaaaaaaaaaa_U`aaaaa_S_aaaaaVV[ +@CSHL_3_FC042AGLLWW:1:2:7:292 +GGAGAAATACACACAATTGGTTAATCCCCCTATATA ++CSHL_3_FC042AGLLWW:1:2:7:292 +babaaaaaaaUMaaaaaaaaaaa\XEUUEP_]UERE +@CSHL_3_FC042AGLLWW:1:2:7:1819 +AATTCAAACCACCCCAACCCACACACAGAGATACAA ++CSHL_3_FC042AGLLWW:1:2:7:1819 +a\\QVVVLaaLOEXUWUUEKUULEMUEUUKULIQMU +@CSHL_3_FC042AGLLWW:1:2:7:1875 +GCAAAAGAGTAGTGTACCCCCCCCCCCCCCCCCCCC ++CSHL_3_FC042AGLLWW:1:2:7:1875 +aaaaaaaaaXUXXEXaaaaa`_ZaaaaaaaaaXEXU +@CSHL_3_FC042AGLLWW:1:2:8:624 +ACTGCAATTGGTTAATCCCCCTATATAGCGCTGTGG ++CSHL_3_FC042AGLLWW:1:2:8:624 +aa[S^`X`aa_]]OOXMU^_[MU_aaaaaaaaaaaa +@CSHL_3_FC042AGLLWW:1:2:8:250 +TGCCGCGCACACTGATGCAATTGGTTAATCCCCCTA ++CSHL_3_FC042AGLLWW:1:2:8:250 +aaaaaaaa^aaaaaabbb[KXPEU[RXZ^JUKRKXE diff --git a/tool_collections/fastx_toolkit/fastx_clipper/test-data/fastx_clipper1a.out b/tool_collections/fastx_toolkit/fastx_clipper/test-data/fastx_clipper1a.out new file mode 100644 index 00000000000..9a52a29fe49 --- /dev/null +++ b/tool_collections/fastx_toolkit/fastx_clipper/test-data/fastx_clipper1a.out @@ -0,0 +1,8 @@ +@CSHL_3_FC042AGLLWW:1:2:7:203 +GTACGCATGACCGAACCCCCCNCCCCC ++CSHL_3_FC042AGLLWW:1:2:7:203 +aab^V^aU]`aa^aZaaabbXEZabaa +@CSHL_3_FC042AGLLWW:1:2:8:250 +TGCCGCGCACACTGATG ++CSHL_3_FC042AGLLWW:1:2:8:250 +aaaaaaaa^aaaaaabb diff --git a/tool_collections/fastx_toolkit/fastx_clipper/tool-data/fastx_clipper_sequences.txt.sample b/tool_collections/fastx_toolkit/fastx_clipper/tool-data/fastx_clipper_sequences.txt.sample new file mode 100644 index 00000000000..79f88f1c3a7 --- /dev/null +++ b/tool_collections/fastx_toolkit/fastx_clipper/tool-data/fastx_clipper_sequences.txt.sample @@ -0,0 +1,13 @@ +# +# Adapter/Linker sequences for FASTX-Clipper tool. +# +# Format: +# Adapter Sequence Descriptive name +# +# Example: +# AAATTTGATAAGATA Our-Adapter +# +# Some adapters can be found here: +# http://seqanswers.com/forums/showthread.php?t=198 +# +#TGTAGGCC Dummy-Adapter (don't use me) diff --git a/tool_collections/fastx_toolkit/fastx_collapser/.shed.yml b/tool_collections/fastx_toolkit/fastx_collapser/.shed.yml new file mode 100644 index 00000000000..e3e7f6f4d81 --- /dev/null +++ b/tool_collections/fastx_toolkit/fastx_collapser/.shed.yml @@ -0,0 +1,10 @@ +categories: +- Fasta Manipulation +description: Collapse sequences +long_description: | + This tool collapses identical sequences in a FASTA file into a single sequence. +name: fastx_collapser +owner: devteam +homepage_url: http://hannonlab.cshl.edu/fastx_toolkit/ +remote_repository_url: https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/fastx_toolkit/fastx_collapser +type: unrestricted diff --git a/tool_collections/fastx_toolkit/fastx_collapser/fastx_collapser.xml b/tool_collections/fastx_toolkit/fastx_collapser/fastx_collapser.xml new file mode 100644 index 00000000000..c6b37b3bb5f --- /dev/null +++ b/tool_collections/fastx_toolkit/fastx_collapser/fastx_collapser.xml @@ -0,0 +1,92 @@ + + sequences + + macros.xml + + + + + + + + + + + + + + + + + + + + +**What it does** + +This tool collapses identical sequences in a FASTA file into a single sequence. + +-------- + +**Example** + +Example Input File (Sequence "ATAT" appears multiple times):: + + >CSHL_2_FC0042AGLLOO_1_1_605_414 + TGCG + >CSHL_2_FC0042AGLLOO_1_1_537_759 + ATAT + >CSHL_2_FC0042AGLLOO_1_1_774_520 + TGGC + >CSHL_2_FC0042AGLLOO_1_1_742_502 + ATAT + >CSHL_2_FC0042AGLLOO_1_1_781_514 + TGAG + >CSHL_2_FC0042AGLLOO_1_1_757_487 + TTCA + >CSHL_2_FC0042AGLLOO_1_1_903_769 + ATAT + >CSHL_2_FC0042AGLLOO_1_1_724_499 + ATAT + +Example Output file:: + + >1-1 + TGCG + >2-4 + ATAT + >3-1 + TGGC + >4-1 + TGAG + >5-1 + TTCA + +.. class:: infomark + +Original Sequence Names / Lane descriptions (e.g. "CSHL_2_FC0042AGLLOO_1_1_742_502") are discarded. + +The output sequence name is composed of two numbers: the first is the sequence's number, the second is the multiplicity value. + +The following output:: + + >2-4 + ATAT + +means that the sequence "ATAT" is the second sequence in the file, and it appeared 4 times in the input FASTA file. + + +------ + +This tool is based on `FASTX-toolkit`__ by Assaf Gordon. + + .. __: http://hannonlab.cshl.edu/fastx_toolkit/ + + + diff --git a/tool_collections/fastx_toolkit/fastx_collapser/macros.xml b/tool_collections/fastx_toolkit/fastx_collapser/macros.xml new file mode 120000 index 00000000000..0c6ff6c3756 --- /dev/null +++ b/tool_collections/fastx_toolkit/fastx_collapser/macros.xml @@ -0,0 +1 @@ +../macros.xml \ No newline at end of file diff --git a/tool_collections/fastx_toolkit/fastx_collapser/test-data/fasta_collapser1.fasta b/tool_collections/fastx_toolkit/fastx_collapser/test-data/fasta_collapser1.fasta new file mode 100644 index 00000000000..6720c254408 --- /dev/null +++ b/tool_collections/fastx_toolkit/fastx_collapser/test-data/fasta_collapser1.fasta @@ -0,0 +1,84 @@ +>1 +TGTATTTACAATGACTAGAAA +>2 +ATTGCTGCTCGGATGGTCCGGCTGTGCACAC +>3 +AGTACAAGGACATGC +>4 +ATTGCTGCTCGGATGGTCCGGCTGTGCACAC +>5 +AGTACAAGGACATGC +>6 +ATTGCTGCTCGGATGGTCCGGCTGTGCACAC +>7 +AGTACAAGGACATGC +>8 +AGTACAAGGACATGC +>9 +ATTGCTGCTCGGATGGTCCGGCTGTGCACAC +>10 +AGTACAAGGACATGC +>11 +AGTACAAGGACATGC +>12 +ATTGCTGCTCGGATGGTCCGGCTGTGCACAC +>13 +CGATTGCCGAAGTCTACCA +>14 +AGTACAAGGACATGC +>15 +CCTTGTAGTGGATTCTGATGA +>16 +AGTACAAGGACATGC +>17 +AGTACAAGGACATGC +>18 +ATTGCTGCTCGGATGGTCCGGCTGTGCACAC +>19 +AGTACAAGGACATGC +>20 +ATTGCTGCTCGGATGGTCCGGCTGTGCACAC +>21 +AGTACAAGGACATGC +>22 +AGTACAAGGACATGC +>23 +CTGCTGCGATCGGTGTGC +>24 +AGTACAAGGACATGC +>25 +ACCATTCGAGCATAC +>26 +AGTACAAGGACATGC +>27 +TCAAATTCTAGATTTTTACGG +>28 +AGTACAAGGACATGC +>29 +TGATTTCCAGAGCCAAT +>30 +ATTGCTGCTCGGATGGTCCGGCTGTGCACAC +>31 +TTACCTCACGATATTGTAATA +>32 +ATGACTTCATCGTCCACCCTTTAGAACT +>33 +ATTGCTGCTCGGATGGTCCGGCTGTGCACAC +>34 +TTCAACGCCGCCGTGAAC +>35 +ATTGCTGCTCGGATGGTCCGGCTGTGCACAC +>36 +CTGCTGCGATCGGTGTGC +>37 +ATTGCTGCTCGGATGGTCCGGCTGTGCACAC +>38 +TTCAACGCCGCCGTGAAC +>39 +TTCAACGCCGCCGTGAAC +>40 +CTGCTGCGATCGGTGTGC +>41 +TTCAACGCCGCCGTGAAC +>42 +TTCAACGCCGCCGTGAAC diff --git a/tool_collections/fastx_toolkit/fastx_collapser/test-data/fasta_collapser1.out b/tool_collections/fastx_toolkit/fastx_collapser/test-data/fasta_collapser1.out new file mode 100644 index 00000000000..22f9d963491 --- /dev/null +++ b/tool_collections/fastx_toolkit/fastx_collapser/test-data/fasta_collapser1.out @@ -0,0 +1,24 @@ +>1-15 +AGTACAAGGACATGC +>2-11 +ATTGCTGCTCGGATGGTCCGGCTGTGCACAC +>3-5 +TTCAACGCCGCCGTGAAC +>4-3 +CTGCTGCGATCGGTGTGC +>5-1 +ACCATTCGAGCATAC +>6-1 +TGTATTTACAATGACTAGAAA +>7-1 +TGATTTCCAGAGCCAAT +>8-1 +CGATTGCCGAAGTCTACCA +>9-1 +TCAAATTCTAGATTTTTACGG +>10-1 +TTACCTCACGATATTGTAATA +>11-1 +CCTTGTAGTGGATTCTGATGA +>12-1 +ATGACTTCATCGTCCACCCTTTAGAACT diff --git a/tool_collections/fastx_toolkit/fastx_nucleotides_distribution/.shed.yml b/tool_collections/fastx_toolkit/fastx_nucleotides_distribution/.shed.yml new file mode 100644 index 00000000000..e7687e1dfc5 --- /dev/null +++ b/tool_collections/fastx_toolkit/fastx_nucleotides_distribution/.shed.yml @@ -0,0 +1,11 @@ +categories: +- Fastq Manipulation +- Graphics +description: Draw nucleotides distribution chart +long_description: | + Creates a stacked-histogram graph for the nucleotide distribution in the FASTQ library. +name: fastx_nucleotides_distribution +owner: devteam +homepage_url: http://hannonlab.cshl.edu/fastx_toolkit/ +remote_repository_url: https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/fastx_toolkit/fastx_nucleotides_distribution +type: unrestricted diff --git a/tool_collections/fastx_toolkit/fastx_nucleotides_distribution/fastq_nucleotides_distribution_1.png b/tool_collections/fastx_toolkit/fastx_nucleotides_distribution/fastq_nucleotides_distribution_1.png new file mode 100644 index 00000000000..5727acd65c0 Binary files /dev/null and b/tool_collections/fastx_toolkit/fastx_nucleotides_distribution/fastq_nucleotides_distribution_1.png differ diff --git a/tool_collections/fastx_toolkit/fastx_nucleotides_distribution/fastq_nucleotides_distribution_2.png b/tool_collections/fastx_toolkit/fastx_nucleotides_distribution/fastq_nucleotides_distribution_2.png new file mode 100644 index 00000000000..cdc52a9c80b Binary files /dev/null and b/tool_collections/fastx_toolkit/fastx_nucleotides_distribution/fastq_nucleotides_distribution_2.png differ diff --git a/tool_collections/fastx_toolkit/fastx_nucleotides_distribution/fastq_nucleotides_distribution_3.png b/tool_collections/fastx_toolkit/fastx_nucleotides_distribution/fastq_nucleotides_distribution_3.png new file mode 100644 index 00000000000..39d679657ec Binary files /dev/null and b/tool_collections/fastx_toolkit/fastx_nucleotides_distribution/fastq_nucleotides_distribution_3.png differ diff --git a/tool_collections/fastx_toolkit/fastx_nucleotides_distribution/fastq_nucleotides_distribution_4.png b/tool_collections/fastx_toolkit/fastx_nucleotides_distribution/fastq_nucleotides_distribution_4.png new file mode 100644 index 00000000000..c6ab9b41660 Binary files /dev/null and b/tool_collections/fastx_toolkit/fastx_nucleotides_distribution/fastq_nucleotides_distribution_4.png differ diff --git a/tool_collections/fastx_toolkit/fastx_nucleotides_distribution/fastx_nucleotides_distribution.xml b/tool_collections/fastx_toolkit/fastx_nucleotides_distribution/fastx_nucleotides_distribution.xml new file mode 100644 index 00000000000..616823bf410 --- /dev/null +++ b/tool_collections/fastx_toolkit/fastx_nucleotides_distribution/fastx_nucleotides_distribution.xml @@ -0,0 +1,62 @@ + + + + macros.xml + + + fastx_nucleotide_distribution_graph.sh -t '$input.name' -i $input -o $output + + + + + + + + + + + + + + + + + +**What it does** + +Creates a stacked-histogram graph for the nucleotide distribution in the Solexa library. + +.. class:: infomark + +**TIP:** Use the **FASTQ Statistics** tool to generate the report file needed for this tool. + +----- + +**Output Examples** + +The following chart clearly shows the barcode used at the 5'-end of the library: **GATCT** + +.. image:: fastq_nucleotides_distribution_1.png + +In the following chart, one can almost 'read' the most abundant sequence by looking at the dominant values: **TGATA TCGTA TTGAT GACTG AA...** + +.. image:: fastq_nucleotides_distribution_2.png + +The following chart shows a growing number of unknown (N) nucleotides towards later cycles (which might indicate a sequencing problem): + +.. image:: fastq_nucleotides_distribution_3.png + +But most of the time, the chart will look rather random: + +.. image:: fastq_nucleotides_distribution_4.png + +------ + +This tool is based on `FASTX-toolkit`__ by Assaf Gordon. + + .. __: http://hannonlab.cshl.edu/fastx_toolkit/ + + + + + diff --git a/tool_collections/fastx_toolkit/fastx_nucleotides_distribution/macros.xml b/tool_collections/fastx_toolkit/fastx_nucleotides_distribution/macros.xml new file mode 120000 index 00000000000..0c6ff6c3756 --- /dev/null +++ b/tool_collections/fastx_toolkit/fastx_nucleotides_distribution/macros.xml @@ -0,0 +1 @@ +../macros.xml \ No newline at end of file diff --git a/tool_collections/fastx_toolkit/fastx_nucleotides_distribution/test-data/fastx_nucleotides_distribution-in1.txt b/tool_collections/fastx_toolkit/fastx_nucleotides_distribution/test-data/fastx_nucleotides_distribution-in1.txt new file mode 100644 index 00000000000..852c8122efe --- /dev/null +++ b/tool_collections/fastx_toolkit/fastx_nucleotides_distribution/test-data/fastx_nucleotides_distribution-in1.txt @@ -0,0 +1,37 @@ +column count min max sum mean Q1 med Q3 IQR lW rW A_Count C_Count G_Count T_Count N_Count Max_count +1 9 23 34 288 32.00 33 33 33 0 33 33 3 1 4 1 0 9 +2 9 28 33 287 31.89 31 33 33 2 28 33 3 3 2 1 0 9 +3 9 13 34 268 29.78 28 33 33 5 21 34 5 1 0 3 0 9 +4 9 17 33 261 29.00 30 33 33 3 26 33 1 2 3 3 0 9 +5 9 22 33 269 29.89 30 33 33 3 26 33 3 3 3 0 0 9 +6 9 22 33 277 30.78 30 33 33 3 26 33 5 3 0 1 0 9 +7 9 21 33 258 28.67 24 33 33 9 21 33 4 1 3 1 0 9 +8 9 12 33 263 29.22 32 33 33 1 31 33 2 1 1 5 0 9 +9 9 29 33 290 32.22 33 33 33 0 33 33 3 3 2 1 0 9 +10 9 23 33 277 30.78 32 33 33 1 31 33 1 4 2 2 0 9 +11 9 12 33 245 27.22 21 31 33 12 12 33 5 2 1 1 0 9 +12 9 13 33 214 23.78 15 24 33 18 13 33 2 4 2 1 0 9 +13 9 5 33 249 27.67 29 31 33 4 23 33 2 1 1 5 0 9 +14 9 5 33 233 25.89 24 33 33 9 11 33 3 3 2 1 0 9 +15 9 15 33 251 27.89 24 33 33 9 15 33 5 1 1 2 0 9 +16 9 23 34 269 29.89 24 33 33 9 23 34 3 1 2 3 0 9 +17 9 13 34 266 29.56 33 33 33 0 33 33 2 3 1 3 0 9 +18 9 21 34 272 30.22 31 33 33 2 28 34 0 5 1 3 0 9 +19 9 5 34 244 27.11 27 30 33 6 18 34 4 4 1 0 0 9 +20 9 11 34 241 26.78 23 32 33 10 11 34 3 4 2 0 0 9 +21 9 13 33 240 26.67 24 27 33 9 13 33 1 4 0 4 0 9 +22 9 5 33 190 21.11 13 21 33 20 5 33 1 4 0 3 1 9 +23 9 5 33 205 22.78 16 26 33 17 5 33 4 4 1 0 0 9 +24 9 5 33 247 27.44 28 31 33 5 21 33 1 5 1 2 0 9 +25 9 11 34 241 26.78 24 33 33 9 11 34 3 4 0 2 0 9 +26 9 5 33 212 23.56 18 31 33 15 5 33 0 6 0 3 0 9 +27 9 5 33 227 25.22 21 26 33 12 5 33 3 4 1 1 0 9 +28 9 21 33 255 28.33 24 31 33 9 21 33 2 4 3 0 0 9 +29 9 5 33 228 25.33 21 30 33 12 5 33 2 4 1 2 0 9 +30 9 10 33 213 23.67 16 28 33 17 10 33 3 4 2 0 0 9 +31 9 5 33 236 26.22 21 31 33 12 5 33 1 4 1 3 0 9 +32 9 5 33 210 23.33 12 29 33 21 5 33 3 3 0 3 0 9 +33 9 5 33 183 20.33 9 21 33 24 5 33 1 4 2 2 0 9 +34 9 5 33 150 16.67 7 17 22 15 5 33 3 4 1 1 0 9 +35 9 13 33 217 24.11 21 24 29 8 13 33 1 4 1 3 0 9 +36 9 5 33 195 21.67 18 21 32 14 5 33 3 2 1 3 0 9 diff --git a/tool_collections/fastx_toolkit/fastx_nucleotides_distribution/test-data/fastx_nucleotides_distribution-out1.png b/tool_collections/fastx_toolkit/fastx_nucleotides_distribution/test-data/fastx_nucleotides_distribution-out1.png new file mode 100644 index 00000000000..aeac7234748 Binary files /dev/null and b/tool_collections/fastx_toolkit/fastx_nucleotides_distribution/test-data/fastx_nucleotides_distribution-out1.png differ diff --git a/tool_collections/fastx_toolkit/fastx_quality_statistics/.shed.yml b/tool_collections/fastx_toolkit/fastx_quality_statistics/.shed.yml new file mode 100644 index 00000000000..579d1305270 --- /dev/null +++ b/tool_collections/fastx_toolkit/fastx_quality_statistics/.shed.yml @@ -0,0 +1,11 @@ +categories: +- Fastq Manipulation +- Statistics +description: Compute quality statistics +long_description: | + Creates quality statistics report for the given FASTQ library. +name: fastx_quality_statistics +owner: devteam +homepage_url: http://hannonlab.cshl.edu/fastx_toolkit/ +remote_repository_url: https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/fastx_toolkit/fastx_quality_statistics +type: unrestricted diff --git a/tool_collections/fastx_toolkit/fastx_quality_statistics/fastx_quality_statistics.xml b/tool_collections/fastx_toolkit/fastx_quality_statistics/fastx_quality_statistics.xml new file mode 100644 index 00000000000..3cf8aaa04cc --- /dev/null +++ b/tool_collections/fastx_toolkit/fastx_quality_statistics/fastx_quality_statistics.xml @@ -0,0 +1,73 @@ + + + + macros.xml + + + + + + + + + + + + + + + + + + + +**What it does** + +Creates quality statistics report for the given Solexa/FASTQ library. + +.. class:: infomark + +**TIP:** This statistics report can be used as input for **Quality Score** and **Nucleotides Distribution** tools. + +----- + +**The output file will contain the following fields:** + +* column = column number (1 to 36 for a 36-cycles read Solexa file) +* count = number of bases found in this column. +* min = Lowest quality score value found in this column. +* max = Highest quality score value found in this column. +* sum = Sum of quality score values for this column. +* mean = Mean quality score value for this column. +* Q1 = 1st quartile quality score. +* med = Median quality score. +* Q3 = 3rd quartile quality score. +* IQR = Inter-Quartile range (Q3-Q1). +* lW = 'Left-Whisker' value (for boxplotting). +* rW = 'Right-Whisker' value (for boxplotting). +* A_Count = Count of 'A' nucleotides found in this column. +* C_Count = Count of 'C' nucleotides found in this column. +* G_Count = Count of 'G' nucleotides found in this column. +* T_Count = Count of 'T' nucleotides found in this column. +* N_Count = Count of 'N' nucleotides found in this column. + + +For example:: + + 1 6362991 -4 40 250734117 39.41 40 40 40 0 40 40 1396976 1329101 678730 2958184 0 + 2 6362991 -5 40 250531036 39.37 40 40 40 0 40 40 1786786 1055766 1738025 1782414 0 + 3 6362991 -5 40 248722469 39.09 40 40 40 0 40 40 2296384 984875 1443989 1637743 0 + 4 6362991 -4 40 248214827 39.01 40 40 40 0 40 40 2536861 1167423 1248968 1409739 0 + 36 6362991 -5 40 117158566 18.41 7 15 30 23 -5 40 4074444 1402980 63287 822035 245 + +------ + +This tool is based on `FASTX-toolkit`__ by Assaf Gordon. + + .. __: http://hannonlab.cshl.edu/fastx_toolkit/ + + + + diff --git a/tool_collections/fastx_toolkit/fastx_quality_statistics/macros.xml b/tool_collections/fastx_toolkit/fastx_quality_statistics/macros.xml new file mode 120000 index 00000000000..0c6ff6c3756 --- /dev/null +++ b/tool_collections/fastx_toolkit/fastx_quality_statistics/macros.xml @@ -0,0 +1 @@ +../macros.xml \ No newline at end of file diff --git a/tool_collections/fastx_toolkit/fastx_quality_statistics/test-data/fastq_stats1.fastq b/tool_collections/fastx_toolkit/fastx_quality_statistics/test-data/fastq_stats1.fastq new file mode 100644 index 00000000000..d1bc160aa5d --- /dev/null +++ b/tool_collections/fastx_toolkit/fastx_quality_statistics/test-data/fastq_stats1.fastq @@ -0,0 +1,36 @@ +@CSHL_3_FC042AGLLWW:1:2:7:203 +GTACGCATGACCGAACCCCCCNCCCCCCAATTGGTT ++CSHL_3_FC042AGLLWW:1:2:7:203 +BBC?7?B6>ABB?B;BBBCC9&;BCBBBBBBBB>>A +@CSHL_3_FC042AGLLWW:1:2:7:33 +CAATGCCTCCAATTGGTTAATCCCCCTATATATACT ++CSHL_3_FC042AGLLWW:1:2:7:33 +8BBB?B;BB8?6@9B8BB=8.&1?,&;931&&&(BB +@CSHL_3_FC042AGLLWW:1:2:7:169 +GCAGCAGGCGCGTCAGAGAGCCCCCCCCCCCCCCCC ++CSHL_3_FC042AGLLWW:1:2:7:169 +B@.?B=6BBB@.@BBBBBBBBBBBBBBB7=;6(663 +@CSHL_3_FC042AGLLWW:1:2:7:1436 +AATTATTTATTAAATTTTAATAATATGGGAGACACT ++CSHL_3_FC042AGLLWW:1:2:7:1436 +B?BBBBBBBBBBBBBBB@6ABBBBB@4@BBBBB77< +@CSHL_3_FC042AGLLWW:1:2:7:292 +GGAGAAATACACACAATTGGTTAATCCCCCTATATA ++CSHL_3_FC042AGLLWW:1:2:7:292 +CBCBBBBBBB6.BBBBBBBBBBB=9&66&1@>6&3& +@CSHL_3_FC042AGLLWW:1:2:7:1819 +AATTCAAACCACCCCAACCCACACACAGAGATACAA ++CSHL_3_FC042AGLLWW:1:2:7:1819 +B==2777-BB-0&96866&,66-&.6&66,6-*2.6 +@CSHL_3_FC042AGLLWW:1:2:7:1875 +GCAAAAGAGTAGTGTACCCCCCCCCCCCCCCCCCCC ++CSHL_3_FC042AGLLWW:1:2:7:1875 +BBBBBBBBB9699&9BBBBBA@;BBBBBBBBB9&96 +@CSHL_3_FC042AGLLWW:1:2:8:624 +ACTGCAATTGGTTAATCCCCCTATATAGCGCTGTGG ++CSHL_3_FC042AGLLWW:1:2:8:624 +BB<4?A9ABB@>>009.6?@<.6@BBBBBBBBBBBB +@CSHL_3_FC042AGLLWW:1:2:8:250 +TGCCGCGCACACTGATGCAATTGGTTAATCCCCCTA ++CSHL_3_FC042AGLLWW:1:2:8:250 +BBBBBBBB?BBBBBBCCC<,91&6<39;?+6,3,9& diff --git a/tool_collections/fastx_toolkit/fastx_quality_statistics/test-data/fastq_stats1.out b/tool_collections/fastx_toolkit/fastx_quality_statistics/test-data/fastq_stats1.out new file mode 100644 index 00000000000..852c8122efe --- /dev/null +++ b/tool_collections/fastx_toolkit/fastx_quality_statistics/test-data/fastq_stats1.out @@ -0,0 +1,37 @@ +column count min max sum mean Q1 med Q3 IQR lW rW A_Count C_Count G_Count T_Count N_Count Max_count +1 9 23 34 288 32.00 33 33 33 0 33 33 3 1 4 1 0 9 +2 9 28 33 287 31.89 31 33 33 2 28 33 3 3 2 1 0 9 +3 9 13 34 268 29.78 28 33 33 5 21 34 5 1 0 3 0 9 +4 9 17 33 261 29.00 30 33 33 3 26 33 1 2 3 3 0 9 +5 9 22 33 269 29.89 30 33 33 3 26 33 3 3 3 0 0 9 +6 9 22 33 277 30.78 30 33 33 3 26 33 5 3 0 1 0 9 +7 9 21 33 258 28.67 24 33 33 9 21 33 4 1 3 1 0 9 +8 9 12 33 263 29.22 32 33 33 1 31 33 2 1 1 5 0 9 +9 9 29 33 290 32.22 33 33 33 0 33 33 3 3 2 1 0 9 +10 9 23 33 277 30.78 32 33 33 1 31 33 1 4 2 2 0 9 +11 9 12 33 245 27.22 21 31 33 12 12 33 5 2 1 1 0 9 +12 9 13 33 214 23.78 15 24 33 18 13 33 2 4 2 1 0 9 +13 9 5 33 249 27.67 29 31 33 4 23 33 2 1 1 5 0 9 +14 9 5 33 233 25.89 24 33 33 9 11 33 3 3 2 1 0 9 +15 9 15 33 251 27.89 24 33 33 9 15 33 5 1 1 2 0 9 +16 9 23 34 269 29.89 24 33 33 9 23 34 3 1 2 3 0 9 +17 9 13 34 266 29.56 33 33 33 0 33 33 2 3 1 3 0 9 +18 9 21 34 272 30.22 31 33 33 2 28 34 0 5 1 3 0 9 +19 9 5 34 244 27.11 27 30 33 6 18 34 4 4 1 0 0 9 +20 9 11 34 241 26.78 23 32 33 10 11 34 3 4 2 0 0 9 +21 9 13 33 240 26.67 24 27 33 9 13 33 1 4 0 4 0 9 +22 9 5 33 190 21.11 13 21 33 20 5 33 1 4 0 3 1 9 +23 9 5 33 205 22.78 16 26 33 17 5 33 4 4 1 0 0 9 +24 9 5 33 247 27.44 28 31 33 5 21 33 1 5 1 2 0 9 +25 9 11 34 241 26.78 24 33 33 9 11 34 3 4 0 2 0 9 +26 9 5 33 212 23.56 18 31 33 15 5 33 0 6 0 3 0 9 +27 9 5 33 227 25.22 21 26 33 12 5 33 3 4 1 1 0 9 +28 9 21 33 255 28.33 24 31 33 9 21 33 2 4 3 0 0 9 +29 9 5 33 228 25.33 21 30 33 12 5 33 2 4 1 2 0 9 +30 9 10 33 213 23.67 16 28 33 17 10 33 3 4 2 0 0 9 +31 9 5 33 236 26.22 21 31 33 12 5 33 1 4 1 3 0 9 +32 9 5 33 210 23.33 12 29 33 21 5 33 3 3 0 3 0 9 +33 9 5 33 183 20.33 9 21 33 24 5 33 1 4 2 2 0 9 +34 9 5 33 150 16.67 7 17 22 15 5 33 3 4 1 1 0 9 +35 9 13 33 217 24.11 21 24 29 8 13 33 1 4 1 3 0 9 +36 9 5 33 195 21.67 18 21 32 14 5 33 3 2 1 3 0 9 diff --git a/tool_collections/fastx_toolkit/fastx_renamer/.shed.yml b/tool_collections/fastx_toolkit/fastx_renamer/.shed.yml new file mode 100644 index 00000000000..0412d4c4a27 --- /dev/null +++ b/tool_collections/fastx_toolkit/fastx_renamer/.shed.yml @@ -0,0 +1,11 @@ +categories: +- Fasta Manipulation +- Fastq Manipulation +description: Rename sequences +long_description: | + This tool renames the sequence identifiers in a FASTQ/A file. +name: fastx_renamer +owner: devteam +homepage_url: http://hannonlab.cshl.edu/fastx_toolkit/ +remote_repository_url: https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/fastx_toolkit/fastx_renamer +type: unrestricted diff --git a/tool_collections/fastx_toolkit/fastx_renamer/fastx_renamer.xml b/tool_collections/fastx_toolkit/fastx_renamer/fastx_renamer.xml new file mode 100644 index 00000000000..0167d61c877 --- /dev/null +++ b/tool_collections/fastx_toolkit/fastx_renamer/fastx_renamer.xml @@ -0,0 +1,77 @@ + + + + macros.xml + + + + + + + + + + + + + + + + + + + + + + + + + + +**What it does** + +This tool renames the sequence identifiers in a FASTQ/A file. + +.. class:: infomark + +Use this tool at the beginning of your workflow, as a way to keep the original sequence (before trimming, clipping, barcode-removal, etc). + +-------- + +**Example** + +The following Solexa-FASTQ file:: + + @CSHL_4_FC042GAMMII_2_1_517_596 + GGTCAATGATGAGTTGGCACTGTAGGCACCATCAAT + +CSHL_4_FC042GAMMII_2_1_517_596 + 40 40 40 40 40 40 40 40 40 40 38 40 40 40 40 40 14 40 40 40 40 40 36 40 13 14 24 24 9 24 9 40 10 10 15 40 + +Renamed to **nucleotides sequence**:: + + @GGTCAATGATGAGTTGGCACTGTAGGCACCATCAAT + GGTCAATGATGAGTTGGCACTGTAGGCACCATCAAT + +GGTCAATGATGAGTTGGCACTGTAGGCACCATCAAT + 40 40 40 40 40 40 40 40 40 40 38 40 40 40 40 40 14 40 40 40 40 40 36 40 13 14 24 24 9 24 9 40 10 10 15 40 + +Renamed to **numeric counter**:: + + @1 + GGTCAATGATGAGTTGGCACTGTAGGCACCATCAAT + +1 + 40 40 40 40 40 40 40 40 40 40 38 40 40 40 40 40 14 40 40 40 40 40 36 40 13 14 24 24 9 24 9 40 10 10 15 40 + +------ + +This tool is based on `FASTX-toolkit`__ by Assaf Gordon. + + .. __: http://hannonlab.cshl.edu/fastx_toolkit/ + + + + diff --git a/tool_collections/fastx_toolkit/fastx_renamer/macros.xml b/tool_collections/fastx_toolkit/fastx_renamer/macros.xml new file mode 120000 index 00000000000..0c6ff6c3756 --- /dev/null +++ b/tool_collections/fastx_toolkit/fastx_renamer/macros.xml @@ -0,0 +1 @@ +../macros.xml \ No newline at end of file diff --git a/tool_collections/fastx_toolkit/fastx_renamer/test-data/fastx_renamer-in1.fastq b/tool_collections/fastx_toolkit/fastx_renamer/test-data/fastx_renamer-in1.fastq new file mode 100644 index 00000000000..d3386dda038 --- /dev/null +++ b/tool_collections/fastx_toolkit/fastx_renamer/test-data/fastx_renamer-in1.fastq @@ -0,0 +1,36 @@ +@CSHL_3_FC042AGLLWW:1:2:7:203 +GTACGCATGACCGAACCCCCCNCCCCCCAATTGGTT ++CSHL_3_FC042AGLLWW:1:2:7:203 +aab^V^aU]`aa^aZaaabbXEZabaaaaaaaa]]` +@CSHL_3_FC042AGLLWW:1:2:7:33 +CAATGCCTCCAATTGGTTAATCCCCCTATATATACT ++CSHL_3_FC042AGLLWW:1:2:7:33 +Waaa^aZaaW^U_XaWaa\WMEP^KEZXRPEEEGaa +@CSHL_3_FC042AGLLWW:1:2:7:169 +GCAGCAGGCGCGTCAGAGAGCCCCCCCCCCCCCCCC ++CSHL_3_FC042AGLLWW:1:2:7:169 +a_M^a\Uaaa_M_aaaaaaaaaaaaaaaV\ZUGUUR +@CSHL_3_FC042AGLLWW:1:2:7:1436 +AATTATTTATTAAATTTTAATAATATGGGAGACACT ++CSHL_3_FC042AGLLWW:1:2:7:1436 +a^aaaaaaaaaaaaaaa_U`aaaaa_S_aaaaaVV[ +@CSHL_3_FC042AGLLWW:1:2:7:292 +GGAGAAATACACACAATTGGTTAATCCCCCTATATA ++CSHL_3_FC042AGLLWW:1:2:7:292 +babaaaaaaaUMaaaaaaaaaaa\XEUUEP_]UERE +@CSHL_3_FC042AGLLWW:1:2:7:1819 +AATTCAAACCACCCCAACCCACACACAGAGATACAA ++CSHL_3_FC042AGLLWW:1:2:7:1819 +a\\QVVVLaaLOEXUWUUEKUULEMUEUUKULIQMU +@CSHL_3_FC042AGLLWW:1:2:7:1875 +GCAAAAGAGTAGTGTACCCCCCCCCCCCCCCCCCCC ++CSHL_3_FC042AGLLWW:1:2:7:1875 +aaaaaaaaaXUXXEXaaaaa`_ZaaaaaaaaaXEXU +@CSHL_3_FC042AGLLWW:1:2:8:624 +ACTGCAATTGGTTAATCCCCCTATATAGCGCTGTGG ++CSHL_3_FC042AGLLWW:1:2:8:624 +aa[S^`X`aa_]]OOXMU^_[MU_aaaaaaaaaaaa +@CSHL_3_FC042AGLLWW:1:2:8:250 +TGCCGCGCACACTGATGCAATTGGTTAATCCCCCTA ++CSHL_3_FC042AGLLWW:1:2:8:250 +aaaaaaaa^aaaaaabbb[KXPEU[RXZ^JUKRKXE diff --git a/tool_collections/fastx_toolkit/fastx_renamer/test-data/fastx_renamer-out1.fastq b/tool_collections/fastx_toolkit/fastx_renamer/test-data/fastx_renamer-out1.fastq new file mode 100644 index 00000000000..80c7c79e4c0 --- /dev/null +++ b/tool_collections/fastx_toolkit/fastx_renamer/test-data/fastx_renamer-out1.fastq @@ -0,0 +1,36 @@ +@GTACGCATGACCGAACCCCCCNCCCCCCAATTGGTT +GTACGCATGACCGAACCCCCCNCCCCCCAATTGGTT ++GTACGCATGACCGAACCCCCCNCCCCCCAATTGGTT +aab^V^aU]`aa^aZaaabbXEZabaaaaaaaa]]` +@CAATGCCTCCAATTGGTTAATCCCCCTATATATACT +CAATGCCTCCAATTGGTTAATCCCCCTATATATACT ++CAATGCCTCCAATTGGTTAATCCCCCTATATATACT +Waaa^aZaaW^U_XaWaa\WMEP^KEZXRPEEEGaa +@GCAGCAGGCGCGTCAGAGAGCCCCCCCCCCCCCCCC +GCAGCAGGCGCGTCAGAGAGCCCCCCCCCCCCCCCC ++GCAGCAGGCGCGTCAGAGAGCCCCCCCCCCCCCCCC +a_M^a\Uaaa_M_aaaaaaaaaaaaaaaV\ZUGUUR +@AATTATTTATTAAATTTTAATAATATGGGAGACACT +AATTATTTATTAAATTTTAATAATATGGGAGACACT ++AATTATTTATTAAATTTTAATAATATGGGAGACACT +a^aaaaaaaaaaaaaaa_U`aaaaa_S_aaaaaVV[ +@GGAGAAATACACACAATTGGTTAATCCCCCTATATA +GGAGAAATACACACAATTGGTTAATCCCCCTATATA ++GGAGAAATACACACAATTGGTTAATCCCCCTATATA +babaaaaaaaUMaaaaaaaaaaa\XEUUEP_]UERE +@AATTCAAACCACCCCAACCCACACACAGAGATACAA +AATTCAAACCACCCCAACCCACACACAGAGATACAA ++AATTCAAACCACCCCAACCCACACACAGAGATACAA +a\\QVVVLaaLOEXUWUUEKUULEMUEUUKULIQMU +@GCAAAAGAGTAGTGTACCCCCCCCCCCCCCCCCCCC +GCAAAAGAGTAGTGTACCCCCCCCCCCCCCCCCCCC ++GCAAAAGAGTAGTGTACCCCCCCCCCCCCCCCCCCC +aaaaaaaaaXUXXEXaaaaa`_ZaaaaaaaaaXEXU +@ACTGCAATTGGTTAATCCCCCTATATAGCGCTGTGG +ACTGCAATTGGTTAATCCCCCTATATAGCGCTGTGG ++ACTGCAATTGGTTAATCCCCCTATATAGCGCTGTGG +aa[S^`X`aa_]]OOXMU^_[MU_aaaaaaaaaaaa +@TGCCGCGCACACTGATGCAATTGGTTAATCCCCCTA +TGCCGCGCACACTGATGCAATTGGTTAATCCCCCTA ++TGCCGCGCACACTGATGCAATTGGTTAATCCCCCTA +aaaaaaaa^aaaaaabbb[KXPEU[RXZ^JUKRKXE diff --git a/tool_collections/fastx_toolkit/fastx_reverse_complement/.shed.yml b/tool_collections/fastx_toolkit/fastx_reverse_complement/.shed.yml new file mode 100644 index 00000000000..705e0825b16 --- /dev/null +++ b/tool_collections/fastx_toolkit/fastx_reverse_complement/.shed.yml @@ -0,0 +1,12 @@ +categories: +- Fastq Manipulation +- Fasta Manipulation +description: Reverse-Complement +long_description: | + This tool reverse-complements each sequence in a library. If the + library is a FASTQ, the quality-scores are also reversed. +name: fastx_reverse_complement +owner: devteam +homepage_url: http://hannonlab.cshl.edu/fastx_toolkit/ +remote_repository_url: https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/fastx_toolkit/fastx_reverse_complement +type: unrestricted diff --git a/tool_collections/fastx_toolkit/fastx_reverse_complement/fastx_reverse_complement.xml b/tool_collections/fastx_toolkit/fastx_reverse_complement/fastx_reverse_complement.xml new file mode 100644 index 00000000000..1533ff51e76 --- /dev/null +++ b/tool_collections/fastx_toolkit/fastx_reverse_complement/fastx_reverse_complement.xml @@ -0,0 +1,65 @@ + + + + macros.xml + + + + + + + + + + + + + + + + + + + + + + + + +**What it does** + +This tool reverse-complements each sequence in a library. +If the library is a FASTQ, the quality-scores are also reversed. + +-------- + +**Example** + +Input FASTQ file:: + + @CSHL_1_FC42AGWWWXX:8:1:3:740 + TGTCTGTAGCCTCNTCCTTGTAATTCAAAGNNGGTA + +CSHL_1_FC42AGWWWXX:8:1:3:740 + 33 33 33 34 33 33 33 33 33 33 33 33 27 5 27 33 33 33 33 33 33 27 21 27 33 32 31 29 26 24 5 5 15 17 27 26 + + +Output FASTQ file:: + + @CSHL_1_FC42AGWWWXX:8:1:3:740 + TACCNNCTTTGAATTACAAGGANGAGGCTACAGACA + +CSHL_1_FC42AGWWWXX:8:1:3:740 + 26 27 17 15 5 5 24 26 29 31 32 33 27 21 27 33 33 33 33 33 33 27 5 27 33 33 33 33 33 33 33 33 34 33 33 33 + +------ + +This tool is based on `FASTX-toolkit`__ by Assaf Gordon. + + .. __: http://hannonlab.cshl.edu/fastx_toolkit/ + + + diff --git a/tool_collections/fastx_toolkit/fastx_reverse_complement/macros.xml b/tool_collections/fastx_toolkit/fastx_reverse_complement/macros.xml new file mode 120000 index 00000000000..0c6ff6c3756 --- /dev/null +++ b/tool_collections/fastx_toolkit/fastx_reverse_complement/macros.xml @@ -0,0 +1 @@ +../macros.xml \ No newline at end of file diff --git a/tool_collections/fastx_toolkit/fastx_reverse_complement/test-data/fastx_rev_comp1.fasta b/tool_collections/fastx_toolkit/fastx_reverse_complement/test-data/fastx_rev_comp1.fasta new file mode 100644 index 00000000000..6767ab70655 --- /dev/null +++ b/tool_collections/fastx_toolkit/fastx_reverse_complement/test-data/fastx_rev_comp1.fasta @@ -0,0 +1,4 @@ +>CSHL__2_FC042NGABCD:8:1:120:202 +ACGATAGATCGGAAGAGCTAGTATGCCGTTTTCTGC +>CSHL__2_FC042NGABCD:8:1:103:1185 +ATCACGATAGATCGGCAGAGCTCGTTTACCGTCTTC diff --git a/tool_collections/fastx_toolkit/fastx_reverse_complement/test-data/fastx_rev_comp2.fastq b/tool_collections/fastx_toolkit/fastx_reverse_complement/test-data/fastx_rev_comp2.fastq new file mode 100644 index 00000000000..c9bb2c58d03 --- /dev/null +++ b/tool_collections/fastx_toolkit/fastx_reverse_complement/test-data/fastx_rev_comp2.fastq @@ -0,0 +1,8 @@ +@CSHL__2_FC042NGABCD:8:1:120:202 +ACGATAGATCGGAAGAGCTAGTATGCCGTTTTCTGC ++CSHL__2_FC042NGABCD:8:1:120:202 +40 40 40 40 20 40 40 40 40 6 40 40 28 40 40 25 40 20 40 -1 30 40 14 27 40 8 1 3 7 -1 11 10 -1 21 10 8 +@CSHL__2_FC042NGABCD:8:1:103:1185 +ATCACGATAGATCGGCAGAGCTCGTTTACCGTCTTC ++CSHL__2_FC042NGABCD:8:1:103:1185 +40 40 40 40 40 35 33 31 40 40 40 32 30 22 40 -0 9 22 17 14 8 36 15 34 22 12 23 3 10 -0 8 2 4 25 30 2 diff --git a/tool_collections/fastx_toolkit/fastx_reverse_complement/test-data/fastx_reverse_complement1.out b/tool_collections/fastx_toolkit/fastx_reverse_complement/test-data/fastx_reverse_complement1.out new file mode 100644 index 00000000000..0707c85dc3f --- /dev/null +++ b/tool_collections/fastx_toolkit/fastx_reverse_complement/test-data/fastx_reverse_complement1.out @@ -0,0 +1,4 @@ +>CSHL__2_FC042NGABCD:8:1:120:202 +GCAGAAAACGGCATACTAGCTCTTCCGATCTATCGT +>CSHL__2_FC042NGABCD:8:1:103:1185 +GAAGACGGTAAACGAGCTCTGCCGATCTATCGTGAT diff --git a/tool_collections/fastx_toolkit/fastx_reverse_complement/test-data/fastx_reverse_complement2.out b/tool_collections/fastx_toolkit/fastx_reverse_complement/test-data/fastx_reverse_complement2.out new file mode 100644 index 00000000000..65997ea46c2 --- /dev/null +++ b/tool_collections/fastx_toolkit/fastx_reverse_complement/test-data/fastx_reverse_complement2.out @@ -0,0 +1,8 @@ +@CSHL__2_FC042NGABCD:8:1:120:202 +GCAGAAAACGGCATACTAGCTCTTCCGATCTATCGT ++CSHL__2_FC042NGABCD:8:1:120:202 +8 10 21 -1 10 11 -1 7 3 1 8 40 27 14 40 30 -1 40 20 40 25 40 40 28 40 40 6 40 40 40 40 20 40 40 40 40 +@CSHL__2_FC042NGABCD:8:1:103:1185 +GAAGACGGTAAACGAGCTCTGCCGATCTATCGTGAT ++CSHL__2_FC042NGABCD:8:1:103:1185 +2 30 25 4 2 8 0 10 3 23 12 22 34 15 36 8 14 17 22 9 0 40 22 30 32 40 40 40 31 33 35 40 40 40 40 40 diff --git a/tool_collections/fastx_toolkit/fastx_trimmer/.shed.yml b/tool_collections/fastx_toolkit/fastx_trimmer/.shed.yml new file mode 100644 index 00000000000..fe23448138d --- /dev/null +++ b/tool_collections/fastx_toolkit/fastx_trimmer/.shed.yml @@ -0,0 +1,11 @@ +categories: +- Fasta Manipulation +- Fastq Manipulation +description: Trim sequences +long_description: | + This tool trims (cut bases from) sequences in a FASTA/Q file. +name: fastx_trimmer +owner: devteam +homepage_url: http://hannonlab.cshl.edu/fastx_toolkit/ +remote_repository_url: https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/fastx_toolkit/fastx_trimmer +type: unrestricted diff --git a/tool_collections/fastx_toolkit/fastx_trimmer/fastx_trimmer.xml b/tool_collections/fastx_toolkit/fastx_trimmer/fastx_trimmer.xml new file mode 100644 index 00000000000..952d32dd106 --- /dev/null +++ b/tool_collections/fastx_toolkit/fastx_trimmer/fastx_trimmer.xml @@ -0,0 +1,85 @@ + + + + macros.xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +**What it does** + +This tool trims (cut bases from) sequences in a FASTA/Q file. + +-------- + +**Example** + +Input Fasta file (with 36 bases in each sequences):: + + >1-1 + TATGGTCAGAAACCATATGCAGAGCCTGTAGGCACC + >2-1 + CAGCGAGGCTTTAATGCCATTTGGCTGTAGGCACCA + + +Trimming with First=1 and Last=21, we get a FASTA file with 21 bases in each sequences (starting from the first base):: + + >1-1 + TATGGTCAGAAACCATATGCA + >2-1 + CAGCGAGGCTTTAATGCCATT + +Trimming with First=6 and Last=10, will generate a FASTA file with 5 bases (bases 6,7,8,9,10) in each sequences:: + + >1-1 + TCAGA + >2-1 + AGGCT + + ------ + +This tool is based on `FASTX-toolkit`__ by Assaf Gordon. + + .. __: http://hannonlab.cshl.edu/fastx_toolkit/ + + + + diff --git a/tool_collections/fastx_toolkit/fastx_trimmer/macros.xml b/tool_collections/fastx_toolkit/fastx_trimmer/macros.xml new file mode 120000 index 00000000000..0c6ff6c3756 --- /dev/null +++ b/tool_collections/fastx_toolkit/fastx_trimmer/macros.xml @@ -0,0 +1 @@ +../macros.xml \ No newline at end of file diff --git a/tool_collections/fastx_toolkit/fastx_trimmer/test-data/fastx_trimmer1.fasta b/tool_collections/fastx_toolkit/fastx_trimmer/test-data/fastx_trimmer1.fasta new file mode 100644 index 00000000000..6767ab70655 --- /dev/null +++ b/tool_collections/fastx_toolkit/fastx_trimmer/test-data/fastx_trimmer1.fasta @@ -0,0 +1,4 @@ +>CSHL__2_FC042NGABCD:8:1:120:202 +ACGATAGATCGGAAGAGCTAGTATGCCGTTTTCTGC +>CSHL__2_FC042NGABCD:8:1:103:1185 +ATCACGATAGATCGGCAGAGCTCGTTTACCGTCTTC diff --git a/tool_collections/fastx_toolkit/fastx_trimmer/test-data/fastx_trimmer1.out b/tool_collections/fastx_toolkit/fastx_trimmer/test-data/fastx_trimmer1.out new file mode 100644 index 00000000000..94f4a66b037 --- /dev/null +++ b/tool_collections/fastx_toolkit/fastx_trimmer/test-data/fastx_trimmer1.out @@ -0,0 +1,4 @@ +>CSHL__2_FC042NGABCD:8:1:120:202 +TAGATCGGAAGAGCTAGTATGCCGTTTTCTGC +>CSHL__2_FC042NGABCD:8:1:103:1185 +CGATAGATCGGCAGAGCTCGTTTACCGTCTTC diff --git a/tool_collections/fastx_toolkit/fastx_trimmer/test-data/fastx_trimmer2.fastq b/tool_collections/fastx_toolkit/fastx_trimmer/test-data/fastx_trimmer2.fastq new file mode 100644 index 00000000000..c9bb2c58d03 --- /dev/null +++ b/tool_collections/fastx_toolkit/fastx_trimmer/test-data/fastx_trimmer2.fastq @@ -0,0 +1,8 @@ +@CSHL__2_FC042NGABCD:8:1:120:202 +ACGATAGATCGGAAGAGCTAGTATGCCGTTTTCTGC ++CSHL__2_FC042NGABCD:8:1:120:202 +40 40 40 40 20 40 40 40 40 6 40 40 28 40 40 25 40 20 40 -1 30 40 14 27 40 8 1 3 7 -1 11 10 -1 21 10 8 +@CSHL__2_FC042NGABCD:8:1:103:1185 +ATCACGATAGATCGGCAGAGCTCGTTTACCGTCTTC ++CSHL__2_FC042NGABCD:8:1:103:1185 +40 40 40 40 40 35 33 31 40 40 40 32 30 22 40 -0 9 22 17 14 8 36 15 34 22 12 23 3 10 -0 8 2 4 25 30 2 diff --git a/tool_collections/fastx_toolkit/fastx_trimmer/test-data/fastx_trimmer2.out b/tool_collections/fastx_toolkit/fastx_trimmer/test-data/fastx_trimmer2.out new file mode 100644 index 00000000000..bf2127ccfbe --- /dev/null +++ b/tool_collections/fastx_toolkit/fastx_trimmer/test-data/fastx_trimmer2.out @@ -0,0 +1,8 @@ +@CSHL__2_FC042NGABCD:8:1:120:202 +ACGATAGATCGGAAGAGCTAGTATGCC ++CSHL__2_FC042NGABCD:8:1:120:202 +40 40 40 40 20 40 40 40 40 6 40 40 28 40 40 25 40 20 40 -1 30 40 14 27 40 8 1 +@CSHL__2_FC042NGABCD:8:1:103:1185 +ATCACGATAGATCGGCAGAGCTCGTTT ++CSHL__2_FC042NGABCD:8:1:103:1185 +40 40 40 40 40 35 33 31 40 40 40 32 30 22 40 0 9 22 17 14 8 36 15 34 22 12 23 diff --git a/tool_collections/fastx_toolkit/macros.xml b/tool_collections/fastx_toolkit/macros.xml new file mode 100644 index 00000000000..4592168e4e1 --- /dev/null +++ b/tool_collections/fastx_toolkit/macros.xml @@ -0,0 +1,21 @@ + + + + + fastx_toolkit + + + + + + + @UNPUBLISHED{agordon, + author = "Assaf Gordon", + title = "FASTQ/A short-reads pre-processing tools", + year = "2010", + note = "http://hannonlab.cshl.edu/fastx_toolkit/", + url = "http://hannonlab.cshl.edu/fastx_toolkit/"} + + + + \ No newline at end of file