Commit be654783 authored by Rebecca E Batorsky's avatar Rebecca E Batorsky
Browse files

modify insertion script to process complex, try 2. Add 0 to left or right...

modify insertion script to process complex, try 2. Add 0 to left or right boundary if no matching sequence is found
parent 03ec23c4
......@@ -15,7 +15,7 @@ suppressPackageStartupMessages(library(Biostrings))
suppressPackageStartupMessages(library(stringr))
suppressPackageStartupMessages(library(dplyr))
##test if there is at least one argument: if not, return an error
#test if there is at least one argument: if not, return an error
if (length(args)<5) {
print(length(args))
stop("Usage: Rscript INSERTION_PROGRAM.R hifibr_reclassified.csv insertion_file.txt nick_location outdir", call.=FALSE)
......@@ -25,10 +25,15 @@ if (length(args)<5) {
out_dir = args[3]
nick = as.integer(args[4])
search_radius=as.integer(args[5])
}
#a_initial<-read.csv(insertion_in)
# hifi_in = "~/Documents/git/sdmmej/test_data/polyA1Seq/PolyA1Seq_testdata_output/PolyA1Seq_testdata_reclassified.csv"
# insertion_in = "~/Documents/git/sdmmej/test_data/polyA1Seq/PolyA1Seq_testdata_output/PolyA1Seq_testdata_complex.txt"
# out_dir = "~/Documents/git/sdmmej/test_data/polyA1Seq/PolyA1Seq_testdata_output/"
# nick = 161
# search_radius=30
a<-read.csv(insertion_in)
## get reference
hifibr_input = read.csv(hifi_in,header=T)
......@@ -74,6 +79,7 @@ k1 <- 30 # how far you want to cut back to search, this needs to to be adjusted
k2 <- 30
sL1 <- substring(L, 1, (l-k1-1):l)
print(sL1)
sR1 <- substring(R, 1:(r-k2-1),r)
a2=NULL # create empty vector to insert left del boundary
......@@ -81,72 +87,34 @@ for (i in a[, 1]){
lb <- str_locate(as.character(i), sL1)
lb <- na.omit(lb)
lbb <- lb[nrow(lb),2]
a2[i] = lbb
if (length(lbb) == 0){
print(paste0("No match found left of boundary for sequenc ", i))
a2[i] = 0
}else{
a2[i] = lbb
}
}
a3=NULL # create empty vector to insert RIGHT del boundary
for (i in a[, 1]){
rb <- str_locate(as.character(i), sR1)
rb <- na.omit(rb)
rbb <- rb[1,1]
a3[i] = rbb
if (length(rbb) == 0){
print(paste0("No match found reft of boundary for sequenc ", i))
a3[i] = 0
}else{
a3[i] = rbb
}
}
## this is necessary to run the complex sequences, but there is an error somewhere
# a2=NULL # create empty vector to insert left del boundary
# a3=NULL # create empty vector to insert RIGHT del boundary
#
# ## this loop checks to see if sequences meet the criteria
# keep_seq=c()
# for (n in 1:nrow(a_initial)){
# i=a_initial[n, 1]
# lbb_found=0
# rbb_found=0
#
# lb <- str_locate(as.character(i), sL1)
# lb <- na.omit(lb)
# lbb <- lb[nrow(lb),2]
#
# if (length(lbb) > 0){
# lbb_found=1
# }
#
# rb <- str_locate(as.character(i), sR1)
# rb <- na.omit(rb)
# rbb <- rb[1,1]
#
# if (length(rbb)>0){
# rbb_found=1
# }
# if ((lbb_found & rbb_found)){
# keep_seq=c(keep_seq,i)
# }else{
# print("skipping seq that doesn't match any L or R")
# print(i)
# }
# }
# a=data.frame("RECONSTRUCTED_SEQ" = keep_seq)
# for (n in 1:nrow(a)){
#
# i=a[n, 1]
#
# lb <- str_locate(as.character(i), sL1)
# lb <- na.omit(lb)
# lbb <- lb[nrow(lb),2]
#
# rb <- str_locate(as.character(i), sR1)
# rb <- na.omit(rb)
# rbb <- rb[1,1]
#
# a2[i] = lbb
# a3[i] = rbb
# }
a4 <- cbind(as.data.frame(a2), as.data.frame(a3)) # combine left and right del boundary
a5 <- cbind(a, a4) # combine seq and del boundary
names(a5) <- c("RECONSTRUCTED_SEQ","left_del", "right_del") # rename columns for ease
del_seq <- paste(substring(a5$RECONSTRUCTED_SEQ, first = 1, last = a5$left_del), # create sequence without the insertion sequence
substring(a5$RECONSTRUCTED_SEQ, first = a5$right_del, last = nchar(as.character(a5$RECONSTRUCTED_SEQ))),sep = "")
a5$del_seq <- del_seq
ins <- substring(a5$RECONSTRUCTED_SEQ, first = a5$left_del+1, last = a5$right_del-1) # extract inserted sequence
a5$insertion <- ins # Now I have insertion added to table
......@@ -472,3 +440,4 @@ output3 <- paste0(out_dir, "/", plasmid, "_",type,"_insertion_alignment2.csv")
write.csv(master2, output1)
write.csv(test2, output2)
write.csv(pretty, output3)
......@@ -39,8 +39,8 @@ echo "Done Hifiber processing"
echo "------"
echo "Starting deletion consistency script, see log ${bn}_deletion.log"
cd deletion/
python SDMMEJDeletionProgram_cli.py -hi ${hifi_reclass} -del ${deletion_out} -n $breakpoint -out $results_dir > ${results_dir}/${bn}_deletion.log
#cd deletion/
python deletion/SDMMEJDeletionProgram_cli.py -hi ${hifi_reclass} -del ${deletion_out} -n $breakpoint -out $results_dir > ${results_dir}/${bn}_deletion.log
echo "------"
echo "Done deletion script"
......@@ -49,8 +49,8 @@ echo "------"
echo "Starting insertion consistency script on insertions"
echo "------"
cd ../insertion/
Rscript INSERTION_PROGRAM.R ${hifi_reclass} ${insertion_out} $results_dir $breakpoint $search_radius
#cd ../insertion/
Rscript insertion/INSERTION_PROGRAM.R ${hifi_reclass} ${insertion_out} $results_dir $breakpoint $search_radius
echo "------"
echo "Done insertion script on insertions"
......@@ -60,8 +60,8 @@ echo "------"
echo "Starting insertion consistency script on complex"
echo "------"
cd ../insertion/
Rscript INSERTION_PROGRAM.R ${hifi_reclass} ${complex_out} $results_dir $breakpoint $search_radius
#cd ../insertion/
Rscript insertion/INSERTION_PROGRAM.R ${hifi_reclass} ${complex_out} $results_dir $breakpoint $search_radius
echo "------"
echo "Done insertion script on complex"
......
RECONSTRUCTED_SEQ
GATCCTAGGATCCTAGGATCCTAGGATCCTAGGATCCTAGGATCCTAGGATCCTAGGATCCTAGGATCCTAGGATCCTAGGAGGGAAAAAATTCGTACTTTGGAGTACGAAAATTGGAAAATAGAGCAGCACTCACCTTATTGTCATTACCCTGTTATCCAAGGCCAAACAGGCCGGCGCCTCCTAACGATCCTCTAGCTCATGTCCTGAACGTTAACGTTAACGTAACGTTAACTCGAGGCCCAGGACCCCAGGACCCCAGGACCCCAGGACCCCAGGACCCCAGGACCCCAGGACCCCAGGACCCCAGGACCCCAGGACC
GATCCTAGGATCCTAGGATCCTAGGATCCTAGGATCCTAGGATCCTAGGATCCTAGGATCCTAGGATCCTAGGATCCTAGGAGGGAAAAAATTCGTACTTTGGAGTACGAAAATTGGAAAAATAGAGCAGCACTCACCTTATTGTCATTACCCTGTTATCCCTAGAGGCCAAACAGGCCGGCGCCTCCTAACGATCCTCTAGCTCATGTCCTGAACGTTAACGTTAACGTAACGTTAACTCGAGGCCCAGGACCCCAGGACCCCAGGACCCCAGGACCCCAGGACCCCAGGACCCCAGGACCCCAGGACCCCAGGACCCCAGGACC
"","ID","insertion_alignment","mechanism"
"1","PolyA1Seq_testdata-1","GATCCTAGGATCCTAGGATCCTAGGATCCTAGGATCCTAGGATCCTAGGATCCTAGGATCCTAGGATCCTAGGATCCTAGGAGGGAAAAAATTCGTACTTTGGAGTACGAAAATTGGAAAATAGAGCAGCACTCACCTTATTGTCATTACCCTGTTATCCAAGGCCAAACAGGCCGGCGCCTCCTAACGATCCTCTAGCTCATGTCCTGAACGTTAACGTTAACGTAACGTTAACTCGAGGCCCAGGACCCCAGGACCCCAGGACCCCAGGACCCCAGGACCCCAGGACCCCAGGACCCCAGGACCCCAGGACCCCAGGACC","seq"
"2","PolyA1Seq_testdata-1","--------------------------------------------------------------------------------------------------------------------------------------------------------------------CCAA----------------------------------------------------------------------------------------------------------------------------------------------------------","Loop-out"
"3","PolyA1Seq_testdata-1","0","Snap-back"
"4","PolyA1Seq_testdata-2","GATCCTAGGATCCTAGGATCCTAGGATCCTAGGATCCTAGGATCCTAGGATCCTAGGATCCTAGGATCCTAGGATCCTAGGAGGGAAAAAATTCGTACTTTGGAGTACGAAAATTGGAAAAATAGAGCAGCACTCACCTTATTGTCATTACCCTGTTATCCCTAGAGGCCAAACAGGCCGGCGCCTCCTAACGATCCTCTAGCTCATGTCCTGAACGTTAACGTTAACGTAACGTTAACTCGAGGCCCAGGACCCCAGGACCCCAGGACCCCAGGACCCCAGGACCCCAGGACCCCAGGACCCCAGGACCCCAGGACCCCAGGACC","seq"
"5","PolyA1Seq_testdata-2","0","Loop-out"
"6","PolyA1Seq_testdata-2","0","Snap-back"
"","ID","DR_START","DR_END","RC_START","RC_END","consistency","RECONSTRUCTED_SEQ","left_del","right_del","del_seq","insertion","plasmid","DRmotif_length","RCmotif_length","Loop-out","Snap-back"
"1","PolyA1Seq_testdata-1",164,167,NA,143,"TRUE","GATCCTAGGATCCTAGGATCCTAGGATCCTAGGATCCTAGGATCCTAGGATCCTAGGATCCTAGGATCCTAGGATCCTAGGAGGGAAAAAATTCGTACTTTGGAGTACGAAAATTGGAAAATAGAGCAGCACTCACCTTATTGTCATTACCCTGTTATCCAAGGCCAAACAGGCCGGCGCCTCCTAACGATCCTCTAGCTCATGTCCTGAACGTTAACGTTAACGTAACGTTAACTCGAGGCCCAGGACCCCAGGACCCCAGGACCCCAGGACCCCAGGACCCCAGGACCCCAGGACCCCAGGACCCCAGGACCCCAGGACC",160,162,"GATCCTAGGATCCTAGGATCCTAGGATCCTAGGATCCTAGGATCCTAGGATCCTAGGATCCTAGGATCCTAGGATCCTAGGAGGGAAAAAATTCGTACTTTGGAGTACGAAAATTGGAAAATAGAGCAGCACTCACCTTATTGTCATTACCCTGTTATCCAGGCCAAACAGGCCGGCGCCTCCTAACGATCCTCTAGCTCATGTCCTGAACGTTAACGTTAACGTAACGTTAACTCGAGGCCCAGGACCCCAGGACCCCAGGACCCCAGGACCCCAGGACCCCAGGACCCCAGGACCCCAGGACCCCAGGACCCCAGGACC","A","PolyA1Seq_testdata",4,3,"--------------------------------------------------------------------------------------------------------------------------------------------------------------------CCAA----------------------------------------------------------------------------------------------------------------------------------------------------------","0"
"2","PolyA1Seq_testdata-2",NA,NA,NA,NA,"FALSE","GATCCTAGGATCCTAGGATCCTAGGATCCTAGGATCCTAGGATCCTAGGATCCTAGGATCCTAGGATCCTAGGATCCTAGGAGGGAAAAAATTCGTACTTTGGAGTACGAAAATTGGAAAAATAGAGCAGCACTCACCTTATTGTCATTACCCTGTTATCCCTAGAGGCCAAACAGGCCGGCGCCTCCTAACGATCCTCTAGCTCATGTCCTGAACGTTAACGTTAACGTAACGTTAACTCGAGGCCCAGGACCCCAGGACCCCAGGACCCCAGGACCCCAGGACCCCAGGACCCCAGGACCCCAGGACCCCAGGACCCCAGGACC",0,163,"TAGAGGCCAAACAGGCCGGCGCCTCCTAACGATCCTCTAGCTCATGTCCTGAACGTTAACGTTAACGTAACGTTAACTCGAGGCCCAGGACCCCAGGACCCCAGGACCCCAGGACCCCAGGACCCCAGGACCCCAGGACCCCAGGACCCCAGGACCCCAGGACC","GATCCTAGGATCCTAGGATCCTAGGATCCTAGGATCCTAGGATCCTAGGATCCTAGGATCCTAGGATCCTAGGATCCTAGGAGGGAAAAAATTCGTACTTTGGAGTACGAAAATTGGAAAAATAGAGCAGCACTCACCTTATTGTCATTACCCTGTTATCCC","PolyA1Seq_testdata",NA,NA,"0","0"
"","ID","DR_START","DR_END","RC_START","RC_END","consistency","left_del","right_del","del_seq","insertion","plasmid","DRmotif_length","RCmotif_length","mechanism","insertion_alignment","unicorn"
"1","PolyA1Seq_testdata-1",164,167,NA,143,"TRUE",160,162,"GATCCTAGGATCCTAGGATCCTAGGATCCTAGGATCCTAGGATCCTAGGATCCTAGGATCCTAGGATCCTAGGATCCTAGGAGGGAAAAAATTCGTACTTTGGAGTACGAAAATTGGAAAATAGAGCAGCACTCACCTTATTGTCATTACCCTGTTATCCAGGCCAAACAGGCCGGCGCCTCCTAACGATCCTCTAGCTCATGTCCTGAACGTTAACGTTAACGTAACGTTAACTCGAGGCCCAGGACCCCAGGACCCCAGGACCCCAGGACCCCAGGACCCCAGGACCCCAGGACCCCAGGACCCCAGGACCCCAGGACC","A","PolyA1Seq_testdata",4,3,"seq","GATCCTAGGATCCTAGGATCCTAGGATCCTAGGATCCTAGGATCCTAGGATCCTAGGATCCTAGGATCCTAGGATCCTAGGAGGGAAAAAATTCGTACTTTGGAGTACGAAAATTGGAAAATAGAGCAGCACTCACCTTATTGTCATTACCCTGTTATCCAAGGCCAAACAGGCCGGCGCCTCCTAACGATCCTCTAGCTCATGTCCTGAACGTTAACGTTAACGTAACGTTAACTCGAGGCCCAGGACCCCAGGACCCCAGGACCCCAGGACCCCAGGACCCCAGGACCCCAGGACCCCAGGACCCCAGGACCCCAGGACC","PolyA1Seq_testdata-1-seq-GATCCTAGGATCCTAGGATCCTAGGATCCTAGGATCCTAGGATCCTAGGATCCTAGGATCCTAGGATCCTAGGATCCTAGGAGGGAAAAAATTCGTACTTTGGAGTACGAAAATTGGAAAATAGAGCAGCACTCACCTTATTGTCATTACCCTGTTATCCAAGGCCAAACAGGCCGGCGCCTCCTAACGATCCTCTAGCTCATGTCCTGAACGTTAACGTTAACGTAACGTTAACTCGAGGCCCAGGACCCCAGGACCCCAGGACCCCAGGACCCCAGGACCCCAGGACCCCAGGACCCCAGGACCCCAGGACCCCAGGACC"
"3","PolyA1Seq_testdata-1",164,167,NA,143,"TRUE",160,162,"GATCCTAGGATCCTAGGATCCTAGGATCCTAGGATCCTAGGATCCTAGGATCCTAGGATCCTAGGATCCTAGGATCCTAGGAGGGAAAAAATTCGTACTTTGGAGTACGAAAATTGGAAAATAGAGCAGCACTCACCTTATTGTCATTACCCTGTTATCCAGGCCAAACAGGCCGGCGCCTCCTAACGATCCTCTAGCTCATGTCCTGAACGTTAACGTTAACGTAACGTTAACTCGAGGCCCAGGACCCCAGGACCCCAGGACCCCAGGACCCCAGGACCCCAGGACCCCAGGACCCCAGGACCCCAGGACCCCAGGACC","A","PolyA1Seq_testdata",4,3,"Loop-out","--------------------------------------------------------------------------------------------------------------------------------------------------------------------CCAA----------------------------------------------------------------------------------------------------------------------------------------------------------","PolyA1Seq_testdata-1-Loop-out---------------------------------------------------------------------------------------------------------------------------------------------------------------------CCAA----------------------------------------------------------------------------------------------------------------------------------------------------------"
"5","PolyA1Seq_testdata-1",164,167,NA,143,"TRUE",160,162,"GATCCTAGGATCCTAGGATCCTAGGATCCTAGGATCCTAGGATCCTAGGATCCTAGGATCCTAGGATCCTAGGATCCTAGGAGGGAAAAAATTCGTACTTTGGAGTACGAAAATTGGAAAATAGAGCAGCACTCACCTTATTGTCATTACCCTGTTATCCAGGCCAAACAGGCCGGCGCCTCCTAACGATCCTCTAGCTCATGTCCTGAACGTTAACGTTAACGTAACGTTAACTCGAGGCCCAGGACCCCAGGACCCCAGGACCCCAGGACCCCAGGACCCCAGGACCCCAGGACCCCAGGACCCCAGGACCCCAGGACC","A","PolyA1Seq_testdata",4,3,"Snap-back","0","PolyA1Seq_testdata-1-Snap-back-0"
"2","PolyA1Seq_testdata-2",NA,NA,NA,NA,"FALSE",0,163,"TAGAGGCCAAACAGGCCGGCGCCTCCTAACGATCCTCTAGCTCATGTCCTGAACGTTAACGTTAACGTAACGTTAACTCGAGGCCCAGGACCCCAGGACCCCAGGACCCCAGGACCCCAGGACCCCAGGACCCCAGGACCCCAGGACCCCAGGACCCCAGGACC","GATCCTAGGATCCTAGGATCCTAGGATCCTAGGATCCTAGGATCCTAGGATCCTAGGATCCTAGGATCCTAGGATCCTAGGAGGGAAAAAATTCGTACTTTGGAGTACGAAAATTGGAAAAATAGAGCAGCACTCACCTTATTGTCATTACCCTGTTATCCC","PolyA1Seq_testdata",NA,NA,"seq","GATCCTAGGATCCTAGGATCCTAGGATCCTAGGATCCTAGGATCCTAGGATCCTAGGATCCTAGGATCCTAGGATCCTAGGAGGGAAAAAATTCGTACTTTGGAGTACGAAAATTGGAAAAATAGAGCAGCACTCACCTTATTGTCATTACCCTGTTATCCCTAGAGGCCAAACAGGCCGGCGCCTCCTAACGATCCTCTAGCTCATGTCCTGAACGTTAACGTTAACGTAACGTTAACTCGAGGCCCAGGACCCCAGGACCCCAGGACCCCAGGACCCCAGGACCCCAGGACCCCAGGACCCCAGGACCCCAGGACCCCAGGACC","PolyA1Seq_testdata-2-seq-GATCCTAGGATCCTAGGATCCTAGGATCCTAGGATCCTAGGATCCTAGGATCCTAGGATCCTAGGATCCTAGGATCCTAGGAGGGAAAAAATTCGTACTTTGGAGTACGAAAATTGGAAAAATAGAGCAGCACTCACCTTATTGTCATTACCCTGTTATCCCTAGAGGCCAAACAGGCCGGCGCCTCCTAACGATCCTCTAGCTCATGTCCTGAACGTTAACGTTAACGTAACGTTAACTCGAGGCCCAGGACCCCAGGACCCCAGGACCCCAGGACCCCAGGACCCCAGGACCCCAGGACCCCAGGACCCCAGGACCCCAGGACC"
"4","PolyA1Seq_testdata-2",NA,NA,NA,NA,"FALSE",0,163,"TAGAGGCCAAACAGGCCGGCGCCTCCTAACGATCCTCTAGCTCATGTCCTGAACGTTAACGTTAACGTAACGTTAACTCGAGGCCCAGGACCCCAGGACCCCAGGACCCCAGGACCCCAGGACCCCAGGACCCCAGGACCCCAGGACCCCAGGACCCCAGGACC","GATCCTAGGATCCTAGGATCCTAGGATCCTAGGATCCTAGGATCCTAGGATCCTAGGATCCTAGGATCCTAGGATCCTAGGAGGGAAAAAATTCGTACTTTGGAGTACGAAAATTGGAAAAATAGAGCAGCACTCACCTTATTGTCATTACCCTGTTATCCC","PolyA1Seq_testdata",NA,NA,"Loop-out","0","PolyA1Seq_testdata-2-Loop-out-0"
"6","PolyA1Seq_testdata-2",NA,NA,NA,NA,"FALSE",0,163,"TAGAGGCCAAACAGGCCGGCGCCTCCTAACGATCCTCTAGCTCATGTCCTGAACGTTAACGTTAACGTAACGTTAACTCGAGGCCCAGGACCCCAGGACCCCAGGACCCCAGGACCCCAGGACCCCAGGACCCCAGGACCCCAGGACCCCAGGACCCCAGGACC","GATCCTAGGATCCTAGGATCCTAGGATCCTAGGATCCTAGGATCCTAGGATCCTAGGATCCTAGGATCCTAGGATCCTAGGAGGGAAAAAATTCGTACTTTGGAGTACGAAAATTGGAAAAATAGAGCAGCACTCACCTTATTGTCATTACCCTGTTATCCC","PolyA1Seq_testdata",NA,NA,"Snap-back","0","PolyA1Seq_testdata-2-Snap-back-0"
GATCCTAGGATCCTAGGATCCTAGGATCCTAGGATCCTAGGATCCTAGGATCCTAGGATCCTAGGATCCTAGGATCCTAGGAGGGAAAAAATTCGTACTTTGGAGTACGAAAATTGGAAAATAGAGCAGCACTCACCTTATTGTCATTACCCTGTTATCCC---AGGCCAAACAGGCCGGCGCCTCCTAACGATCCTCTAGCTCATGTCCTGAACGTTAACGTTAACGTAACGTTAACTCGAGGCCCAGGACCCCAGGACCCCAGGACCCCAGGACCCCAGGACCCCAGGACCCCAGGACCCCAGGACCCCAGGACCCCAGGACC
GATCCTAGGATCCTAGGATCCTAGGATCCTAGGATCCTAGGATCCTAGGATCCTAGGATCCTAGGATCCTAGGATCCTAGGAGGGAAAAAATTCGTACTTTGGAGTACGAAAATTGGAAAATAGAGCAGCACTCACCTTATTGTCATTACCCTGTTA--CCTAGAGGCCAAACAGGCCGGCGCCTCCTAACGATCCTCTAGCTCATGTCCTGAACGTTAACGTTAACGTAACGTTAACTCGAGGCCCAGGACCCCAGGACCCCAGGACCCCAGGACCCCAGGACCCCAGGACCCCAGGACCCCAGGACCCCAGGACCCCAGGACC
Sample ID Deletion Length Repair Type Mechanism Motif to Break Motif to Deletion P1 to Break P1 to Deletion P2 to Break P2 to Deletion P1 to P2 Motif Length Break Side Deletion to MH Motif Sequence
1 3 ABJ snap-back 11 11 8 8 1 1 7 4 left -3 CCTG/CAGG
1 3 ABJ loop-out 17 14 17 14 8 5 9 6 right 0 CAGGCC/CAGGCC
2 2 ABJ loop-out 27 23 27 23 5 1 22 4 left 2 ACCT/ACCT
2 2 ABJ loop-out 15 11 15 11 7 3 8 5 left 2 TTACC/TTACC
RECONSTRUCTED_SEQ
GATCCTAGGATCCTAGGATCCTAGGATCCTAGGATCCTAGGATCCTAGGATCCTAGGATCCTAGGATCCTAGGATCCTAGGAGGGAAAAAATTCGTACTTTGGAGTACGAAAATTGGAAAATAGAGCAGCACTCACCTTATTGTCATTACNCTGTTATCCCTAGAGGCCAAACAGGCCGGCGCCTCCTAACGATCCTCTAGCTCATGTCCTGAACGTTAACGTTAACGTAACGTTAACTCGAGGCCCAGGACCCCAGGACCCCAGGACCCCAGGACCCCAGGACCCCAGGACCCCAGGACCCCAGGACCCCAGGACCCCAGGACC
"","ID","insertion_alignment","mechanism"
"1","PolyA1Seq_testdata-1","GATCCTAGGATCCTAGGATCCTAGGATCCTAGGATCCTAGGATCCTAGGATCCTAGGATCCTAGGATCCTAGGATCCTAGGAGGGAAAAAATTCGTACTTTGGAGTACGAAAATTGGAAAATAGAGCAGCACTCACCTTATTGTCATTACNCTGTTATCCCTAGAGGCCAAACAGGCCGGCGCCTCCTAACGATCCTCTAGCTCATGTCCTGAACGTTAACGTTAACGTAACGTTAACTCGAGGCCCAGGACCCCAGGACCCCAGGACCCCAGGACCCCAGGACCCCAGGACCCCAGGACCCCAGGACCCCAGGACCCCAGGACC","seq"
"2","PolyA1Seq_testdata-1","0","Loop-out"
"3","PolyA1Seq_testdata-1","0","Snap-back"
"","ID","DR_START","DR_END","RC_START","RC_END","consistency","RECONSTRUCTED_SEQ","left_del","right_del","del_seq","insertion","plasmid","DRmotif_length","RCmotif_length","Loop-out","Snap-back"
"1","PolyA1Seq_testdata-1",NA,NA,NA,NA,"FALSE","GATCCTAGGATCCTAGGATCCTAGGATCCTAGGATCCTAGGATCCTAGGATCCTAGGATCCTAGGATCCTAGGATCCTAGGAGGGAAAAAATTCGTACTTTGGAGTACGAAAATTGGAAAATAGAGCAGCACTCACCTTATTGTCATTACNCTGTTATCCCTAGAGGCCAAACAGGCCGGCGCCTCCTAACGATCCTCTAGCTCATGTCCTGAACGTTAACGTTAACGTAACGTTAACTCGAGGCCCAGGACCCCAGGACCCCAGGACCCCAGGACCCCAGGACCCCAGGACCCCAGGACCCCAGGACCCCAGGACCCCAGGACC",150,162,"GATCCTAGGATCCTAGGATCCTAGGATCCTAGGATCCTAGGATCCTAGGATCCTAGGATCCTAGGATCCTAGGATCCTAGGAGGGAAAAAATTCGTACTTTGGAGTACGAAAATTGGAAAATAGAGCAGCACTCACCTTATTGTCATTACTAGAGGCCAAACAGGCCGGCGCCTCCTAACGATCCTCTAGCTCATGTCCTGAACGTTAACGTTAACGTAACGTTAACTCGAGGCCCAGGACCCCAGGACCCCAGGACCCCAGGACCCCAGGACCCCAGGACCCCAGGACCCCAGGACCCCAGGACCCCAGGACC","NCTGTTATCCC","PolyA1Seq_testdata",NA,NA,"0","0"
"","ID","DR_START","DR_END","RC_START","RC_END","consistency","left_del","right_del","del_seq","insertion","plasmid","DRmotif_length","RCmotif_length","mechanism","insertion_alignment","unicorn"
"1","PolyA1Seq_testdata-1",NA,NA,NA,NA,"FALSE",150,162,"GATCCTAGGATCCTAGGATCCTAGGATCCTAGGATCCTAGGATCCTAGGATCCTAGGATCCTAGGATCCTAGGATCCTAGGAGGGAAAAAATTCGTACTTTGGAGTACGAAAATTGGAAAATAGAGCAGCACTCACCTTATTGTCATTACTAGAGGCCAAACAGGCCGGCGCCTCCTAACGATCCTCTAGCTCATGTCCTGAACGTTAACGTTAACGTAACGTTAACTCGAGGCCCAGGACCCCAGGACCCCAGGACCCCAGGACCCCAGGACCCCAGGACCCCAGGACCCCAGGACCCCAGGACCCCAGGACC","NCTGTTATCCC","PolyA1Seq_testdata",NA,NA,"seq","GATCCTAGGATCCTAGGATCCTAGGATCCTAGGATCCTAGGATCCTAGGATCCTAGGATCCTAGGATCCTAGGATCCTAGGAGGGAAAAAATTCGTACTTTGGAGTACGAAAATTGGAAAATAGAGCAGCACTCACCTTATTGTCATTACNCTGTTATCCCTAGAGGCCAAACAGGCCGGCGCCTCCTAACGATCCTCTAGCTCATGTCCTGAACGTTAACGTTAACGTAACGTTAACTCGAGGCCCAGGACCCCAGGACCCCAGGACCCCAGGACCCCAGGACCCCAGGACCCCAGGACCCCAGGACCCCAGGACCCCAGGACC","PolyA1Seq_testdata-1-seq-GATCCTAGGATCCTAGGATCCTAGGATCCTAGGATCCTAGGATCCTAGGATCCTAGGATCCTAGGATCCTAGGATCCTAGGAGGGAAAAAATTCGTACTTTGGAGTACGAAAATTGGAAAATAGAGCAGCACTCACCTTATTGTCATTACNCTGTTATCCCTAGAGGCCAAACAGGCCGGCGCCTCCTAACGATCCTCTAGCTCATGTCCTGAACGTTAACGTTAACGTAACGTTAACTCGAGGCCCAGGACCCCAGGACCCCAGGACCCCAGGACCCCAGGACCCCAGGACCCCAGGACCCCAGGACCCCAGGACCCCAGGACC"
"2","PolyA1Seq_testdata-1",NA,NA,NA,NA,"FALSE",150,162,"GATCCTAGGATCCTAGGATCCTAGGATCCTAGGATCCTAGGATCCTAGGATCCTAGGATCCTAGGATCCTAGGATCCTAGGAGGGAAAAAATTCGTACTTTGGAGTACGAAAATTGGAAAATAGAGCAGCACTCACCTTATTGTCATTACTAGAGGCCAAACAGGCCGGCGCCTCCTAACGATCCTCTAGCTCATGTCCTGAACGTTAACGTTAACGTAACGTTAACTCGAGGCCCAGGACCCCAGGACCCCAGGACCCCAGGACCCCAGGACCCCAGGACCCCAGGACCCCAGGACCCCAGGACCCCAGGACC","NCTGTTATCCC","PolyA1Seq_testdata",NA,NA,"Loop-out","0","PolyA1Seq_testdata-1-Loop-out-0"
"3","PolyA1Seq_testdata-1",NA,NA,NA,NA,"FALSE",150,162,"GATCCTAGGATCCTAGGATCCTAGGATCCTAGGATCCTAGGATCCTAGGATCCTAGGATCCTAGGATCCTAGGATCCTAGGAGGGAAAAAATTCGTACTTTGGAGTACGAAAATTGGAAAATAGAGCAGCACTCACCTTATTGTCATTACTAGAGGCCAAACAGGCCGGCGCCTCCTAACGATCCTCTAGCTCATGTCCTGAACGTTAACGTTAACGTAACGTTAACTCGAGGCCCAGGACCCCAGGACCCCAGGACCCCAGGACCCCAGGACCCCAGGACCCCAGGACCCCAGGACCCCAGGACCCCAGGACC","NCTGTTATCCC","PolyA1Seq_testdata",NA,NA,"Snap-back","0","PolyA1Seq_testdata-1-Snap-back-0"
ID,UMI,CIGAR_STRING,READ_LENGTH,SPLIT_CIGAR_STRING,MATCH_LEFT,MATCH_RIGHT,DISTANCE_FROM_BREAK_LEFT,DISTANCE_FROM_BREAK_RIGHT,DELETION_FROM_LEFT,DELETION_FROM_RIGHT,TOTAL_DELETION,INSERTION_START,INSERTION_END,INSERTION_LENGTH,INSERTED_SEQ,CLASS,ALIGNED_SEQ,READS,MICROHOMOLOGY,MH_Length,NUMBER_OF_ALIGNMENTS,MISMATCH_PERCENTAGE_TO_RECONSTRUCTED,CLASS_final
1,AATTGGAAAA_CTCATGTCCT,325M,325,325M,325,325,164,161,0,0,0,161,164,0,,exact,GATCCTAGGATCCTAGGATCCTAGGATCCTAGGATCCTAGGATCCTAGGATCCTAGGATCCTAGGATCCTAGGATCCTAGGAGGGAAAAAATTCGTACTTTGGAGTACGAAAATTGGAAAATAGAGCAGCACTCACCTTATTGTCATTACCCTGTTATCCCTAGAGGCCAAACAGGCCGGCGCCTCCTAACGATCCTCTAGCTCATGTCCTGAACGTTAACGTTAACGTAACGTTAACTCGAGGCCCAGGACCCCAGGACCCCAGGACCCCAGGACCCCAGGACCCCAGGACCCCAGGACCCCAGGACCCCAGGACCCCAGGACC,214208,NA,NA,1,1.847269943,exact
2,AATTGGAAAA_CTCATGTCCT,325M,325,325M,150,164,-11,0,-11,0,-11,150,164,11,NCTGTTATCCC,complex,GATCCTAGGATCCTAGGATCCTAGGATCCTAGGATCCTAGGATCCTAGGATCCTAGGATCCTAGGATCCTAGGATCCTAGGAGGGAAAAAATTCGTACTTTGGAGTACGAAAATTGGAAAATAGAGCAGCACTCACCTTATTGTCATTACNCTGTTATCCCTAGAGGCCAAACAGGCCGGCGCCTCCTAACGATCCTCTAGCTCATGTCCTGAACGTTAACGTTAACGTAACGTTAACTCGAGGCCCAGGACCCCAGGACCCCAGGACCCCAGGACCCCAGGACCCCAGGACCCCAGGACCCCAGGACCCCAGGACCCCAGGACC,24,NA,NA,1,8.333333333,insertion
3,AATTGGAAAA_CTCATGTCCT,160M3D162M,322,160M3D162M,160,161,-1,-3,-1,-3,-4,160,161,1,A,complex,GATCCTAGGATCCTAGGATCCTAGGATCCTAGGATCCTAGGATCCTAGGATCCTAGGATCCTAGGATCCTAGGATCCTAGGAGGGAAAAAATTCGTACTTTGGAGTACGAAAATTGGAAAATAGAGCAGCACTCACCTTATTGTCATTACCCTGTTATCCAAGGCCAAACAGGCCGGCGCCTCCTAACGATCCTCTAGCTCATGTCCTGAACGTTAACGTTAACGTAACGTTAACTCGAGGCCCAGGACCCCAGGACCCCAGGACCCCAGGACCCCAGGACCCCAGGACCCCAGGACCCCAGGACCCCAGGACCCCAGGACC,40,NA,NA,1,2.5,complex
4,AATTGGAAAA_CTCATGTCCT,117M1I208M,326,117M1I208M,121,208,-40,44,-40,0,-40,121,164,41,ATAGAGCAGCACTCACCTTATTGTCATTACCCTGTTATCCC,complex,GATCCTAGGATCCTAGGATCCTAGGATCCTAGGATCCTAGGATCCTAGGATCCTAGGATCCTAGGATCCTAGGATCCTAGGAGGGAAAAAATTCGTACTTTGGAGTACGAAAATTGGAAAAATAGAGCAGCACTCACCTTATTGTCATTACCCTGTTATCCCTAGAGGCCAAACAGGCCGGCGCCTCCTAACGATCCTCTAGCTCATGTCCTGAACGTTAACGTTAACGTAACGTTAACTCGAGGCCCAGGACCCCAGGACCCCAGGACCCCAGGACCCCAGGACCCCAGGACCCCAGGACCCCAGGACCCCAGGACCCCAGGACC,15,NA,NA,1,0,complex
5,AATTGGAAAA_CTCATGTCCT,157M2D166M,323,157M2D166M,157,166,-4,2,-4,0,-4,157,164,2,CC,complex,GATCCTAGGATCCTAGGATCCTAGGATCCTAGGATCCTAGGATCCTAGGATCCTAGGATCCTAGGATCCTAGGATCCTAGGAGGGAAAAAATTCGTACTTTGGAGTACGAAAATTGGAAAATAGAGCAGCACTCACCTTATTGTCATTACCCTGTTACCTAGAGGCCAAACAGGCCGGCGCCTCCTAACGATCCTCTAGCTCATGTCCTGAACGTTAACGTTAACGTAACGTTAACTCGAGGCCCAGGACCCCAGGACCCCAGGACCCCAGGACCCCAGGACCCCAGGACCCCAGGACCCCAGGACCCCAGGACCCCAGGACC,10,NA,NA,1,0,deletion
6,AATTGGAAAA_CTCATGTCCT,161M3D161M,322,161M3D161M,161,161,0,-3,0,-3,-3,161,161,0,,deletion,GATCCTAGGATCCTAGGATCCTAGGATCCTAGGATCCTAGGATCCTAGGATCCTAGGATCCTAGGATCCTAGGATCCTAGGAGGGAAAAAATTCGTACTTTGGAGTACGAAAATTGGAAAATAGAGCAGCACTCACCTTATTGTCATTACCCTGTTATCCCAGGCCAAACAGGCCGGCGCCTCCTAACGATCCTCTAGCTCATGTCCTGAACGTTAACGTTAACGTAACGTTAACTCGAGGCCCAGGACCCCAGGACCCCAGGACCCCAGGACCCCAGGACCCCAGGACCCCAGGACCCCAGGACCCCAGGACCCCAGGACC,10,NA,0,1,0,deletion
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment