## do not remove the extra print statements print('Question 1') ################################################################# # QUESTION 1 # # # What is the most frequently occuring 6-mer in the fruit fly # genome? Below is a path to the Drosophila melanogaster genome. # this is the same file we used in Week 7. Go through the entire # genome and count each k-mer. Then print out the most frequently # occuring k-mer. You should use overlapping k-mers because we want # to consider all of them! Your answer should just contain one line # that contains only 6 letters, the nucleotides of the most # frequent 6-mer. # ################################################################# genome_fasta = '/autograder/source/dm6.fa' ##add code for question 1 here ## do not remove the extra print statements print('Question 2') ################################################################# # QUESTION 2 # # # # For the provided sequences, concatenate them into one sequence # with no delimiter. Then search through and find the 5th # non-overlapping 7-mer in the concatenated sequence. This should # be the 5th in order of the sequence, not fifth most abundant. # The first 7-mer starting at position 0 will be number 1, and so on. # Your answer should just be one 7-mer on one line, the 5th # non-overlapping 7-mer. # ################################################################# DNA = ['GCTGATCGTACTACG','GCTACGTACTACTAC','GCGCGTACGCTAC','GCATCTGACGCGCGTAACG'] ##add code for question 2 here ## do not remove the extra print statements print('Question 3') ################################################################# # QUESTION 3 # # # # Given the following string, write code that will do the # following tasks: # # 1. split the string on the newline character ('\n') and assign # the resulting elements to a list. # # 2. join the elements of the list together using join() and using # the empty string as a delimiter. # # 3. print out a list of all 2-mers (dinucleotides) in alphabetical # order. You should not print duplicates. Each distinct k-mer in the # resulting sequence should be printed only once. Dont' print the counts, # just a list of dinucleotides that occur in the concatenated sequence # in alpabetical order, one dinucleotide per line. Don't print # additional content beyond that. # ################################################################# sequences = "ATCGTAGCT\nAATCAGCTCGCTA\nTCTCAGAGATT" ##add code for question 3 here ## do not remove the extra print statements print('Question 4') ################################################################# # QUESTION 4 # # # # let's say you have a sequence of length L = 123,456,789 bp. # How many non-overlapping k-mers of length k=27bp would their be? # Print the numerical value of the number of non-overlapping k-mers # only. You answer should be just the number. # ################################################################# L = 123456789 ##add code for question 4 here ## do not remove the extra print statements print('Question 5') ##################################################################### # QUESTION 5 # # # # let's say you have a sequence of length L = 123,456,789 bp. # How many overlapping k-mers of length k=27bp would their be? # Print the numerical value of the number of overlapping k-mers # only. You answer should be just the number. # ##################################################################### L = 123456789 ##add code for question 5 here