Module ExtractFastaFromSAM
[hide private]
[frames] | no frames]

Source Code for Module ExtractFastaFromSAM

 1  #!/usr/bin/python 
 2  # -*- coding: iso-8859-1 -*- 
 3  from DIGEST_functions import * 
 4   
 5   
 6  __doc__=""" 
 7  Extract overlapped paired-end reads and unmapped paired-end reads from a SAM file sorted by reads names. 
 8   
 9  @requires: DIGEST_functions.py (PYTHONPATH) 
10   
11  Input : SAM file sorted by reads names 
12  Output : overlapped paired-end reads and unmapped paired-end reads in FASTA format 
13  """ 
14   
15   
16 -def get_parser():
17 18 parser = argparse.ArgumentParser(description='Extract 2 FASTA from SAM file') 19 20 parser.add_argument('-sam', action="store", dest='SAMfile', 21 type=str, help='SAM file') 22 23 parser.add_argument('-out1', action="store", dest='overlapFile', 24 type=str, help='output prefix for overlapped reads') 25 26 parser.add_argument('-out2', action="store", dest='unmapFile', 27 type=str, help='output prefix for unmapped reads') 28 29 return parser
30 31
32 -def main():
33 34 parser=get_parser() 35 IDliste = [] 36 37 if len(sys.argv)==1: 38 parser.print_help() 39 sys.exit(1) 40 41 Arguments=parser.parse_args() 42 43 samReader=open(Arguments.SAMfile,'rb') 44 overlapFile1 = open(Arguments.overlapFile + "_p1.fasta",'wb') 45 overlapFile2 = open(Arguments.overlapFile + "_p2.fasta",'wb') 46 unmapFile1 = open(Arguments.unmapFile + "_p1.fasta",'wb') 47 unmapFile2 = open(Arguments.unmapFile + "_p2.fasta",'wb') 48 49 flag = True 50 lastSequence = [] 51 map = False 52 53 for ligne in samReader: 54 55 if(ligne[0]!='@'): #/!\ 56 ligne=re.split(r'\t+', ligne) 57 alignment = alignmentSAM(ligne) 58 59 if(flag):#1st read of the pair 60 flag = False 61 lastSequence = alignment.QuerySequence 62 if(alignment.CIGAR.Code!="0"): 63 map = True 64 else: 65 map = False 66 67 else:#2nd read of the pair 68 flag = True 69 if(alignment.CIGAR.Code=="0" and map==False): 70 unmapFile1.write(">" + alignment.Query + "/1\n" + lastSequence + "\n") 71 unmapFile2.write(">" + alignment.Query + "/2\n" + alignment.QuerySequence + "\n") 72 elif((alignment.CIGAR.Code=="0" and map==True) or (alignment.CIGAR.Code!="0" and map==False)): 73 overlapFile1.write(">" + alignment.Query + "/1\n" + lastSequence + "\n") 74 overlapFile2.write(">" + alignment.Query + "/2\n" + alignment.QuerySequence + "\n") 75 76 overlapFile1.close() 77 overlapFile2.close() 78 unmapFile1.close() 79 unmapFile2.close()
80 81 if __name__ == "__main__": 82 main() 83