Module ExtractFastaFromSAM
|
|
1
2
3 from DIGEST_functions import *
4
5
6 __doc__="""
7 Extract overlapped paired-end reads and unmapped paired-end reads from a SAM file sorted by reads names.
8
9 @requires: DIGEST_functions.py (PYTHONPATH)
10
11 Input : SAM file sorted by reads names
12 Output : overlapped paired-end reads and unmapped paired-end reads in FASTA format
13 """
14
15
17
18 parser = argparse.ArgumentParser(description='Extract 2 FASTA from SAM file')
19
20 parser.add_argument('-sam', action="store", dest='SAMfile',
21 type=str, help='SAM file')
22
23 parser.add_argument('-out1', action="store", dest='overlapFile',
24 type=str, help='output prefix for overlapped reads')
25
26 parser.add_argument('-out2', action="store", dest='unmapFile',
27 type=str, help='output prefix for unmapped reads')
28
29 return parser
30
31
33
34 parser=get_parser()
35 IDliste = []
36
37 if len(sys.argv)==1:
38 parser.print_help()
39 sys.exit(1)
40
41 Arguments=parser.parse_args()
42
43 samReader=open(Arguments.SAMfile,'rb')
44 overlapFile1 = open(Arguments.overlapFile + "_p1.fasta",'wb')
45 overlapFile2 = open(Arguments.overlapFile + "_p2.fasta",'wb')
46 unmapFile1 = open(Arguments.unmapFile + "_p1.fasta",'wb')
47 unmapFile2 = open(Arguments.unmapFile + "_p2.fasta",'wb')
48
49 flag = True
50 lastSequence = []
51 map = False
52
53 for ligne in samReader:
54
55 if(ligne[0]!='@'):
56 ligne=re.split(r'\t+', ligne)
57 alignment = alignmentSAM(ligne)
58
59 if(flag):
60 flag = False
61 lastSequence = alignment.QuerySequence
62 if(alignment.CIGAR.Code!="0"):
63 map = True
64 else:
65 map = False
66
67 else:
68 flag = True
69 if(alignment.CIGAR.Code=="0" and map==False):
70 unmapFile1.write(">" + alignment.Query + "/1\n" + lastSequence + "\n")
71 unmapFile2.write(">" + alignment.Query + "/2\n" + alignment.QuerySequence + "\n")
72 elif((alignment.CIGAR.Code=="0" and map==True) or (alignment.CIGAR.Code!="0" and map==False)):
73 overlapFile1.write(">" + alignment.Query + "/1\n" + lastSequence + "\n")
74 overlapFile2.write(">" + alignment.Query + "/2\n" + alignment.QuerySequence + "\n")
75
76 overlapFile1.close()
77 overlapFile2.close()
78 unmapFile1.close()
79 unmapFile2.close()
80
81 if __name__ == "__main__":
82 main()
83