import re ## do not remove the extra print statements print('Question 1') ################################################################# # QUESTION 1 # # # Converting a BED file to a GFF file. You are provide a path to # a file that the autograder has access to below. Please read in # the file, then convert it to a GFF file. Here are some example # lines: # # 2R 22136967 22172834 a . + # 2R 24378629 24390294 mAChR-A . - # 2R 6013909 6015689 Act42A . - # # As with most BED files, this is a tab-delimited file. # Use the name in the name column of the BED file to be an ID in # the GFF, but put it in quotes so that it is something like: # gene_symbol "a" # # when the name in the name column of the BED File is a. Convert the # positions from the GFF one-based inclusive positions to the # BED Files 0-based non-inclusive ends (like python substrings). # For the source, use "FlyBase" (just the name without quotes) # For the frame, you can just use a dot ".". # for the seqtype/feature type please use "gene" (without the quotes) # You can use the score from the bed as-is, even if it is a dot "." # and print the same thing for the GFF. # please print this as a tab-delimited file to an output GFF file # named: flybase_assignment8.gff # ################################################################# ##add code for question 1 here bed_file = '/autograder/source/flybase_example.bed' ## do not remove the extra print statements print('Question 2') ################################################################# # QUESTION 2 # # # Converting a GFF file to a BED file You are provide a path to # a file that the autograder has access to below. Please read in # the file, then convert it to a BED file. Here are some example # lines from the GFF: # # 2R FlyBase gene 22136968 22172834 . + gene_id "FBgn0000008"; gene_symbol "a"; # 2R FlyBase gene 24378630 24390294 . - gene_id "FBgn0000037"; gene_symbol "mAChR-A"; # 2R FlyBase gene 6013910 6015689 . - gene_id "FBgn0000043"; gene_symbol "Act42A"; # # Use the term in the attribute column of the GFF file to be a name # in the name column of the BED file. The GFF has these attribute: # columns: # # gene_id "FBgn0000008"; gene_symbol "a"; # # so you need to extract the gene_symbol and remove the quotes # and use this for the name in the BED File. # Convert the positions from the BED Files 0-based non-inclusive # ends (like python substrings) to the GFF one-based inclusive # positions. # if the score is a dot "." you can keep it a dot ".". # # please print this as a tab-delimited file to an output BED file # named: flybase_assignment8.bed # ################################################################# gff_file = '/autograder/source/flybase_example.gff' ##add code for question 2 here ## do not remove the extra print statements print('Question 3') ################################################################# # QUESTION 3 # # # # Read in the FASTA file in the path given below. Store the # # contents to a dictionary, with the defline as the key, and the# # sequence as the value. Then print a tab-delimited file with # two colummns and the name "seq_lengths.txt". # the first column should be defline, and the second column to # be the length of the sequence. In addition! The sequence lengths # should be sorted in descending order, with the longest # chromosome lengths printed first. # # ################################################################# ##add code for question 3 here genome_fasta = '/autograder/source/example.fasta' ## do not remove the extra print statements print('Question 4') ################################################################# # QUESTION 4 # # # # For the following variables, use an f-string to print them # out to 3 decimal places. Write the output so that each variable # is printed to include the variable name, followed immediately # by an equal sign, and then the value (to three digits after the # decimal: pi=3.141 # # and then print tabs between each variables name/value pair. # The result should just be one line of text, with the three # variables, and their values separated by tabs. Print out # directly and not to a file ################################################################# pi = 3.14159265358979 e = 2.718281828459045 phi = 1.618033988749895 ##add code for question 4 here ## do not remove the extra print statements print('Question 5') ##################################################################### # QUESTION 5 # # # # Do the same thing as in Question 4, but using interpolated # strings. Meaing, print out the same numbers and their values # separated by tabs. Let's do this one to four decimila places. # # For the following variables, use interpolated strings to print them # out to 4 decimal places. Write the output so that each variable # is printed to include the variable name, followed immediately # by an equal sign, and then the value (to four digits after the # decimal: pi=3.1416 # # and then print tabs between each variable's name/value pair. # The result should just be one line of text, with the three # variables, and their values separated by tabs. Print out directly # and not to a file. ##################################################################### pi = 3.14159265358979 e = 2.718281828459045 phi = 1.618033988749895 ##add code for question 5 here