-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathPart4.java
178 lines (151 loc) · 6.07 KB
/
Part4.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
/**
* Finding a gene with CGRatio above 0.35, with sr longer than 60, and number of DNA strands
* @ShreyamDuttagupta
*/
import edu.duke.*;
public class Part4 {
public void processGenes(StorageResource sr){
//print strings longer than 9 characters
int geneCount = 0;
int geneCountAbove60 = 0;
int cgRatioCount = 0;
int geneLength = 0;
String longestGene = "";
for (String s: sr.data()){
geneCount++;
// if (s.length() > 9){
if (s.length() > 60){ //modified for second test case
//System.out.println("This string has a length greater than 60: " + s);
geneCountAbove60 = geneCountAbove60 + 1;
}
double out = cgRatio(s);
if (out > 0.35){
// System.out.println("This string has a C-G-ratio greater than .35: " + s);
cgRatioCount = cgRatioCount + 1;
}
if (s.length() > geneLength){
geneLength = s.length();
longestGene = s;
}
}
System.out.println("The number of strings in sr longer than 60 characters: " + geneCountAbove60);
System.out.println("The number of strings in sr with C-G-ratio higher than 0.35: " + cgRatioCount);
System.out.println("Length of longest gene: " + geneLength);
System.out.println("Longest gene is: " + longestGene);
System.out.println("Number of genes in the storage list is: " + geneCount);
}
public void testProcessGenes(){
String dna1 = "ACAAGATGCCCTAAGTCCCCCGGCCTC" +
"CTGCTGCTGCTGCTCTCCGGGGCCACGGCCACCGCTGCCCTGCCCCTGGAGGGTGGCCCCACCGGCCGAGACAGCGAGCATATGCAGGAAGCGGCAGGAAGGTAGGAAAAGCAGCCTCCTGACTTTCCTCGCTTGGTGGTTTGAGTGGACCTCCCAGGCCATGGCCGGGCCCCTCATAGGAGAGGAAGCTCGGGAGGTGGCCAGGCGGCAGGAAGGCGCACCCCCCCAGCAATCCGCGCGCCGGGACAGAATGCCCTGCAGGAACTTCTTCTGGAAGACCTTCTCCTCCTGCAAATAAAACCTCACCCATGAATGCTCACGCAAGTTTAATTACAGACCTGAA";
String dna2 = "CATGTAGTAAAATGACCTGATAGATATGCTTGTATGCTATGAAAATTAAGTGAAATGACCCA";//multiple stop codons
String dna3 = "CATCATT"; //no ATG, TAA
String dna4 = "ATGCATCCCCCCCCCCCCGGGGGGGGGAATAGAGAA"; //no TAA
String dna5 = "CATCCCCCCCCCCCCGGGGGGGGGAATAGAGAATAAGGGGGGCCCCCCACCCCTGCCCCC"; //no ATG
System.out.println("Dna 2 is " + dna2);
StorageResource dnaList = getAllGenes(dna2);
for (String s: dnaList.data()){
System.out.println("Gene is " + s);
}
processGenes(dnaList);
}
public void testProcessGenesFromFile(){
FileResource fr = new FileResource("GRch38dnapart.fa");
String dna = fr.asString().toUpperCase();
System.out.println("dna is " + dna);
StorageResource geneList = getAllGenes(dna);
//for (String s: geneList.data()){
// System.out.println("Gene is " + s);
// }
processGenes(geneList);
}
public double cgRatio(String dna){
int startIndex = 0;
int num = 0;
int denom = dna.length();
int currIndex = 0;
while (true){
int cIndex = dna.indexOf("C", startIndex);
//System.out.println("cIndex is " + cIndex);
int gIndex = dna.indexOf("G", startIndex);
//System.out.println("gIndex is " + gIndex);
if (cIndex == -1){
currIndex = gIndex;
//System.out.println("currIndex is " + currIndex);
}
else if (gIndex == -1){
currIndex = cIndex;
// System.out.println("currIndex is " + currIndex);
}
else {
currIndex = Math.min(cIndex, gIndex);
//System.out.println("currIndex is " + currIndex);
}
if (currIndex == -1){
break;
}
else
{
num = num + 1;
startIndex = currIndex + 1;
}
}
return ((double) num)/denom ;
}
public int findStopCodon(String dna, int startIndex, String stopCodon){
int currIndex = dna.indexOf(stopCodon, startIndex+3);
while(currIndex != -1){
int indexDifferential = currIndex - startIndex;
if ((indexDifferential) % 3 == 0){
return currIndex;
}
else
currIndex = dna.indexOf(stopCodon, currIndex+1);
}
return dna.length();
}
public String findGene(String dna, int where){
int startIndex = dna.indexOf("ATG", where);
//System.out.println("Start Index is: " + startIndex);
if (startIndex == -1){
return "";
}
int stopIndexTAA = findStopCodon(dna, startIndex, "TAA");
// System.out.println("stopIndexTAA: " + stopIndexTAA);
int stopIndexTAG = findStopCodon(dna, startIndex, "TAG");
// System.out.println("stopIndexTAG: " + stopIndexTAG);
int stopIndexTGA = findStopCodon(dna, startIndex, "TGA");
// System.out.println("stopIndexTGA " + stopIndexTGA);
int minStopIndex1 = Math.min(stopIndexTAA, stopIndexTAG);
int minStopIndex2 = Math.min(minStopIndex1, stopIndexTGA);
if (minStopIndex2 == dna.length()){
return "";
}
else
{
//System.out.println("Gene Stop index is: " + minStopIndex2);
String gene = dna.substring(startIndex, minStopIndex2+3);
// System.out.println("Gene is " + gene);
return gene;
}
}
public StorageResource getAllGenes(String dna){
StorageResource geneList = new StorageResource();
int startIndex = dna.indexOf("ATG");
//System.out.println("start Index is " + startIndex);
while (startIndex != -1){
String currGene = findGene(dna,startIndex);
// System.out.println("Current gene is: " + currGene);
if (currGene.isEmpty()){
startIndex = dna.indexOf("ATG",startIndex + 3);
// System.out.println("New Start index is " + startIndex);
}
else{
geneList.add(currGene);
// System.out.println("Gene added to storage list");
startIndex = dna.indexOf(currGene, startIndex) + currGene.length();
//System.out.println("New Start index is " + startIndex);
}
}
return geneList;
}
}