## do not remove the extra print statements print('Question 1') ################################################################# # QUESTION 1 # # # In lecture, we saw how to write code to read a genome FASTA file. # The autograder will contain a genome fasta file in the path provided # below. # # As with most FASTA files, there will be two types of lines. # One has the ">" sign and is the definition line (define) of each # chromosome. The other lines constitute genomic sequences. # # Write a script to read through the file to collect deflines and # remove the ">" sign before those deflines. You should also # define a new list before you read the file. # # Your code should append each defline to the list if the length # of the defline is less than 3 after the ">" is removed. # # Then, after reading through the file, use the join() method # to join all deflines in the list into a string, using the comma # (,) character as a separator. # # Last, print the string you created, which should be a comma- # separated collection of chromosome names on just one line of text. # ################################################################# genome_fasta = '/autograder/source/dm6.fa' ##add code for question 1 here ## do not remove the extra print statements print('Question 2') ################################################################# # QUESTION 2 # # # # First, for the list of DNA sequences below, combine them into # one sequence by concatenation (hint: you should be able to do # that in one step). Print out the sequence on one line. # # Second, calculate the GC-content for the concatenated sequence # you just print and print the GC-content as a percentage with # two decimal places (not two sig figs, just two digits after the # decimal/dot (.). Print this out one one line just as a number, # no extra headers and no percent symbol. # This answer should have two lines of text. # ################################################################# DNA = ['GCTGATCGTACTACG','GCTACGTACTACTAC','GCGCGTACGCTAC','GCATCTGACGCGCGTAACG'] ##add code for question 2 here ## do not remove the extra print statements print('Question 3') ################################################################# # QUESTION 3 # # # # Given the following string, write code that will do the # following tasks: # # 1. split the string on the newline character ('\n') and assign # the resulting elements to a list. # # 2. join the elements of the list together using join() and using # the empty string as a delimiter. # # 3. print out the resulting string as one line of text. # ################################################################# sequences = "ATCGTAGCT\nAATCAGCTCGCTA\nTCTCAGAGATT" ##add code for question 3 here ## do not remove the extra print statements print('Question 4') ################################################################# # QUESTION 4 # # # # For the genome fasta mentioned above in Question 1, Write some # code to read through the file, and store the genomic # sequences into a dictionary of lists with the chromosome name # as the key and a list of strings as the value. Next use the # join method to create a dictionary of strings, also with the # chromosome name as the key, but the full genome sequence as # the value. This was basically done in an exploration page. # Lastly, print the length of chromosome "2L". # Your answer below should just be length of chromosme 2L, a number # on line line. # ################################################################# genome_fasta = '/autograder/source/dm6.fa' ##add code for question 4 here ## do not remove the extra print statements print('Question 5') ##################################################################### # QUESTION 5 # # # # There is a bed file with the autograder in the path defined below # please read in the bed file using the methods described in class. # Store the columns of the file as a list of tuples. You must only # store the chromosom, start, and end positions (columns 1,2, and 3) # in that order. # and store the start and end as int data types, and the chromosome name # as a string. Then print out the list of tuples just a as list like: # print(L) which should output it on line line (as opposed to looping # through the tuples and printing out each tuple, which you should not # do here) ##################################################################### bed_file = '/autograder/source/example.bed' ##add code for question 5 here