#!/usr/bin/env perl use strict; use warnings; use FileHandle; use Getopt::Long; use File::Basename; my $A_IN = ""; my $A_OUT = ""; my $A_MIN = 0; my $A_HELP = 0; our $PRG_NAME = basename($0); my $result = &GetOptions("i=s" => \$A_IN, "o=s" => \$A_OUT, "min=s" => \$A_MIN, "h" => \$A_HELP ); if (!$result || $A_HELP) { usage(); } open(IN, $A_IN) or usage("[Error] Cannot open $A_IN in [r] mode."); open(OUT, ">$A_OUT") or usage("[Error] Cannot open $A_OUT in [w] mode."); my ($first, $count) = (1, 0); my ($seq, $prevname, $name) = ("", "", ""); while (my $line = ) { chomp $line; if ($line =~ m/>/) { $prevname = $name; $name = $line; if(!$first){ $seq =~ s/N*N/N/g; $count = 0; my @splitline = split(/N/,$seq); foreach (@splitline) { $count++; if (length($_) >= $A_MIN){ print OUT $prevname."_part".$count."\n"; print OUT $_."\n"; } } } $first = 0; $seq = ""; } else{ $seq = $seq.$line; } } $seq =~ s/N*N/N/g; $count = 0; my @splitline = split(/N/,$seq); foreach (@splitline) { $count++; if (length($_) >= $A_MIN){ print OUT $prevname."_part".$count."\n"; print OUT $_."\n"; } } sub usage { my $usage = " $PRG_NAME - split input sequence at each stretch of unknown bases (N). -i : input file -o : output file -min : minimum contig size, default 0 -h : this help \n"; warn $usage; exit 1; }