## ----include = FALSE---------------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>", fig.align="center" ) ## ----eval=FALSE--------------------------------------------------------------- # ####installation using CRAN # install.packages("HDXBoxeR") #execute only once # ## ----eval=FALSE--------------------------------------------------------------- # ####installation using Github. # #run only if devtools package is not installed on your machine # install.packages("devtools") # # library(devtools) #run next two commends only once # devtools::install_github("mkajano/HDXBoxeR") # # #Once installed, you can load the HDXBoxeR package using the following command: # library(HDXBoxeR) # ## ----------------------------------------------------------------------------- # Path to example input library(HDXBoxeR) file_nm<-system.file("extdata", "All_results_table.csv", package = "HDXBoxeR") ### Later change the path to the path you your data!!! ##########Deuteration uptake vs percent deuteration file preparation #### Input for uptake deuteration. Default will use all common states, all common Deuteration Times, max. common number of replicates, no csv will be written, and peptide sequences will be matched. input_uptake_timepoint<-output_tp(filepath = file_nm) # input for percent deuteration input_deut_timepoint<-output_tp(filepath = file_nm, percent=T) ################# ####Time courses #Deuteration uptake input_uptake_timecourse<-output_tc(file_nm) # Time courses comparisons input_deut_timecourse<-output_tc(file_nm, percent=T) ################# ####Analyze selected protein.states, deuteration times, replicates. names_states<- nm_states(file_nm) ## returns names of the states in file. input_two_states_two_timepoints<-output_tp(filepath=file_nm, replicates=3, states=names_states[c(1,2)], times=c("3.00s", "72000.00s"), percent=FALSE) ###add option to match sequence and to save as csv (here not used) input_subset_states<-output_tp(filepath=file_nm, replicates=3, states=names_states[2], times=c("3.00s", "72000.00s"), percent=FALSE, seq_match=FALSE, csv="NA") ## ----------------------------------------------------------------------------- library(HDXBoxeR) file_nm<-system.file("extdata", "All_results_table.csv", package = "HDXBoxeR") input_uptake_timepoint<-output_tp(filepath = file_nm) ###average calculation for all peptides av1<-ave_timepoint(input_uptake_timepoint) head(av1) ###average differences (against first Protein State in the file) da1<-dif_ave(av1) ###standard deviation calculation for all peptides sd1<-sd_timepoint(input_uptake_timepoint) head(sd1) ### Global critical interval for 1 protein sets CI_single(s1 =sd1[,7], replicates = 3 ) ##Global critical interval for 2 protein sets CI_2pts(s1 = sd1[,7], s2=sd1[,8], replicates = 3) ##individual peptide p-value calculation against first set in the input file pv1<-pv_timepoint(input_uptake_timepoint) head(pv1) ####Analyze selected protein.states, deuteration times, replicates. names_states<- nm_states(file_nm) ## returns names of the states in file. ### choosing different state as a control for analysis input_states_reversed<-output_tp(file_nm, states=names_states[c(2,1)] ) pv2<-pv_timepoint(input_states_reversed) head(pv2) ## ----------------------------------------------------------------------------- ###load data states<-arguments_call1(filepath=file_nm) times<-arguments_call2(filepath=file_nm, states=states) replicates<-arguments_call3(filepath=file_nm, states=states, times=times) ## ----------------------------------------------------------------------------- ################# ####Time courses #Deuteration uptake input_uptake_timecourse<-output_tc(file_nm) # Time courses comparisons small_df1<-input_uptake_timepoint[,c(1:6, 10:12)] #if one wants to focus just on the few first peptides in the data it is possible to select a few rows from the input data. small_df2<-input_uptake_timepoint[1:3,] #To have more control of the selecting one can also use the select_indices functions as follows: ##for output_tp function variables that can be used are: #below one time of 60s was used for peptides that started at residue 50, ended at residue 100 and at the max 12 residues long. #not all parameters need to be used. inda<-select_indices(input_uptake_timepoint, times = c("60.00s"),start = 50, end=100, length=12) #after the indices are selected the input can be subsetted as normal small_df3<-input_uptake_timepoint[inda,] ##for the output_tc function allowed are the following parameters timecourse_output<-output_tc(file_nm) indb<-select_indices(timecourse_output, states = "bound",start = 50, end=100, length=12) head(timecourse_output[indb,1:6]) ## ----figuptake, fig.height = 3.5, fig.width = 5------------------------------- ###load data library(HDXBoxeR) file_nm<-system.file("extdata", "All_results_table.csv", package = "HDXBoxeR") input_deut_timepoint<-output_tc(file_nm, percent=TRUE) # define the timepoint using in the experiment. Here the times in seconds x<-c(3, 60,1800, 72000) uptake_plots(input_deut_timepoint[1:2,],x) ## ----fig1, fig.height = 5, fig.width = 3.5------------------------------------ ###load data # library(HDXBoxeR) # file_nm<-system.file("extdata", "All_results_table.csv", package = "HDXBoxeR") # input_uptake_timepoint<-output_tp(file_nm) ### Returns boxplots for all the time points and all protein states. boxplot_tp(input_uptake_timepoint, col= c("gold2", "dodgerblue")) ## ----fig2, fig.height = 5, fig.width = 4-------------------------------------- ###load data library(HDXBoxeR) file_nm<-system.file("extdata", "All_results_table.csv", package = "HDXBoxeR") # ## input for deuteration uptakeinput_uptake_timepoint<-output_tp(file_nm) # # input for procent deuteration input_deut_timepoint<-output_tp(file_nm, percent=T) ## deuteration uptake for time points plots_av_tp(input_uptake_timepoint) ### average plots with colors chosen by user. plots_av_tp(input_uptake_timepoint,replicates=3, cola=c(1:10)) ### average percent deuteration plots_av_tp_proc(input_deut_timepoint) ## ----fig4, fig.height = 5, fig.width = 4-------------------------------------- # ###load data # file_nm<-system.file("extdata", "All_results_table.csv", package = "HDXBoxeR") # ## input for deuteration uptake # input_uptake_timepoint<-output_tp(file_nm) ## difference in uptake deuterations between two sets plots_diff_tp(input_uptake_timepoint) #Load deut data # input for procent deuteration # input_deut_timepoint<-output_tp(file_nm, percent=T) ### difference in procent deuteration plots_diff_tp_proc(input_deut_timepoint,replicates=3, cola=4) #input with different order states names_states<- nm_states(file_nm) input_reversed_states<-output_tp(file_nm, states = rev(names_states)) ### average plots for deuteration uptake where control state was chosen differently plots_diff_tp(input_reversed_states, col="darkgreen") ## ----fig5, fig.height = 5, fig.width = 4-------------------------------------- ###load data file_nm<-system.file("extdata", "All_results_table.csv", package = "HDXBoxeR") ## input for deuteration uptake a<-output_tp(file_nm) ### basic volcano plot plots_vol_tp(a) # change colors for significant peptides in volcano plots plots_vol_tp(a, cola=c(2,3), replicates=3) #change pv_cutoff from 0.01 to 0.1 plots_vol_tp(a, pv_cutoff = 0.1) ## ----fig6, fig.height = 5, fig.width = 4-------------------------------------- ###load data file_nm<-system.file("extdata", "All_results_table.csv", package = "HDXBoxeR") ## input for deuteration uptake a<-output_tp(file_nm) ### significantly different peptides are colored in red-blue scheme. #par(mfrow=c(4,1)) # uncomment if desired plot_peptide_sig_tp(a,replicates = 3) ### Plot where 18 peptides per row are drawn (nb_pep_row=18, default=50), #p-value&critial interval was made more stingent (0.001) # % ranges are colored were changed. #par(mfrow=c(4,1)) #uncomment if desired plot_peptide_sig_tp(a,nb_pep_row = 18, ranges = c(-Inf, -10, -2.5,0,2.5, 10, Inf), pv_cutoff = 0.001) ## ----leg1, fig.height = 3, fig.width = 2.5------------------------------------ ## Legend for significant peptides plot #default ranges for figures does not require argument legend_sig_peptides() ### Using different range scheme legend_sig_peptides(ranges = c(-Inf, -10, -2.5,0,2.5, 10, Inf)) ## ----fig7, fig.height = 3, fig.width = 5-------------------------------------- ###load data library(HDXBoxeR) file_nm<-system.file("extdata", "All_results_table.csv", package = "HDXBoxeR") ## input for deuteration uptake a<-output_tp(file_nm) # input for procent deuteration b<-output_tp(file_nm, percent=T) ##Average uptake heat maps ### heat maps plot_heat_map_tp(a, replicates=3, mar_x=3) ##change some parameters plot_heat_map_tp(a, mar_x=1, ranges = c(-Inf, -10, -2.5,0,2.5, 10, Inf), pv_cutoff=0.01) ### heat map for percent deuteration, require both uptake and percent deuteration data frame as input plot_heat_map_tp_proc(input_up = a, input_proc = b, replicates=3) ###Maximum uptake or percent deuteration per peptide plot_heat_map_max_uptake_tp(a, replicates=3) ### # plot_heat_map_max_uptake_tp_proc(input_up = a, input_proc = b, replicates=3, mar_x=1, ranges = c(-Inf, -10, -2.5,0,2.5, 10, Inf), pv_cutoff=0.01) ## ----leg2, fig.height = 3, fig.width = 2.5------------------------------------ ## Legend for the heatmaps. #default ranges for figures does not require argument legend_heat_map(ranges = c(-Inf, -10, -2.5,0,2.5, 10, Inf)) ## ----fig_woods, fig.height = 5, fig.width = 5--------------------------------- ## file_nm<-system.file("extdata", "All_results_table.csv", package = "HDXBoxeR") tp_aP<-output_tp(filepath = file_nm, percent=T) #timepoints indp<-select_indices(tp_aP, times = c("3.00s", "60.00s")) deuteration_woods_timepoints(tp_aP[indp,], replicates = 3) ## input for timecourses tc_aP<-output_tc(filepath = file_nm, percent=T) #timecourse deuteration_woods_timecourse(tc_aP, replicates=3) ## ----fig9, fig.height = 4, fig.width = 5-------------------------------------- ## file_nm<-system.file("extdata", "All_results_table.csv", package = "HDXBoxeR") ## input for timecourses for procent deuteration and uptake tc_aP<-output_tc(filepath = file_nm, percent=T) ## average procent deuteration in timecourse plots_av_tcourse(tc_aP, replicates = 3) ###average heat map for timecourses plot_heat_map_tc(tc_aP, replicates=3) legend_heat_map_timecourse() ## ----fig8, fig.height = 5, fig.width = 5-------------------------------------- ###load data library(HDXBoxeR) file_nm<-system.file("extdata", "All_results_table.csv", package = "HDXBoxeR") ## input for timecourses for procent deuteration and uptake tc_a<-output_tc(filepath = file_nm) tc_aP<-output_tc(filepath = file_nm, percent=TRUE) ###All peptides drawn robot_plot_All(thP = tc_aP, th=tc_a) ###USe more stingent parameters to have less peptides drawn. CI_factor is a factor that modifies (multiplicate) Critial interval. robot_plot_All(thP = tc_aP, th=tc_a, pv_cutoff=0.005, replicates = 3) ## ----fig8a, fig.height = 4, fig.width = 5, echo=FALSE------------------------- library(HDXBoxeR) file_nm<-system.file("extdata", "All_results_table.csv", package = "HDXBoxeR") ## input for timecourses for procent deuteration and uptake tc_a<-output_tc(filepath = file_nm) tc_aP<-output_tc(filepath = file_nm, percent=T) names_states<- nm_states(file_nm) ## only two protein states should be used in functions below. ## returns dataframe with all significant peptides with pvalue=0.01, CI_factor=2 robot_indexes_df(thP = tc_aP, th=tc_a, states=rev(names_states)[1:2], pvalue = 0.005, CI_factor=1.5) #list of indexes inds<-robot_indexes(thP = tc_aP, th=tc_a, states=rev(names_states)[1:2], pvalue = 0.001, CI_factor=1) ###return indexes of peptide # Make a final robot plot. Above the plot there is bar that showing lack of coverage in the sets (grey), no coverage on plot (blue), coverage on plot (orange). robot_2states_indexes(thP = tc_aP, th=tc_a, states=rev(names_states)[1:2],indexes = inds, pvalue = 0.001, CI_factor=1, ylim=c(-120, 120), xlim=c(50, 230)) ## ----fig_woods_CI, fig.height = 5, fig.width = 5------------------------------ ###load data library(HDXBoxeR) file_nm<-system.file("extdata", "All_results_table.csv", package = "HDXBoxeR") ## input for timecourses for procent deuteration and uptake tc_a<-output_tc(filepath = file_nm) tc_aP<-output_tc(filepath = file_nm, percent=TRUE) ###All peptides drawn woods_CI_plot(thP = tc_aP, th=tc_a) ###USe more stingent parameters to have less peptides drawn. CI_factor is a factor that modifies (multiplicate) Critial interval. woods_CI_plot(thP = tc_aP, th=tc_a, pv_cutoff=0.05, CI_factor = 2) ## ----echo = FALSE, fig10a, fig.height = 4, fig.width = 3, eval=FALSE---------- # file_nm<-system.file("extdata", "All_results_table.csv", package = "HDXBoxeR") # ## input for deuteration uptake # a<-output_tp(file_nm) # # input for procent deuteration # b<-output_tp(file_nm, percent=T) # # # ### Scripts written for significantly different peptides (uptake data). Color scheme, ranges to be colored and p-value can be changed. # #path where the output files will be written needs to be specified! # pymol_script_significant_peptide(a, path=tempdir()) # pymol_script_significant_peptide(a, ranges=c(-Inf, seq(-120, 120, by=10), Inf), pv_cutoff = 0.005,replicates = 3, order.pep=FALSE, path=tempdir()) # ###same but for procent deuteration # pymol_script_significant_peptide_proc(input_proc = b, input_up = a, path=tempdir(), ranges=c(-Inf, seq(-120, 120, by=10), Inf), pv_cutoff = 0.005,replicates = 3) # # ###scripts prepared by residue # pymol_script_significant_residue(a, ranges=c(-Inf, seq(-120, 120, by=10), Inf), path=tempdir(), pv_cutoff = 0.005,replicates = 3) # ## ----echo = FALSE------------------------------------------------------------- file_nm<-system.file("extdata", "All_results_table.csv", package = "HDXBoxeR") all_summary(file_nm, Dfact=0.85,replicates = 3) ## ----echo = FALSE, eval=FALSE------------------------------------------------- # file_nm<-system.file("extdata", "All_results_table.csv", package = "HDXBoxeR") # # ###final output for the paper # ### USAGE output_prep(pathto_allresults.csv, output_name.csv) # output_prep(filepath = file_nm, output_name = tempfile()) # # ###long version of the uptake data, timepoint # ### to save file provide a output name to csv flag # output_tp(filepath =file_nm , csv = tempfile()) # # # ### verbose versions of the output functions create csv files with all important data averages, sd, and pvalues for uptake data # verbose_timepoint_output(filepath = file_nm, output_name =tempfile()) # verbose_timepoint_output(filepath = file_nm, output_name = tempfile(), percent=T) # ## ----eval=FALSE--------------------------------------------------------------- # # returns empty column if data is missing # path_to_folders<-system.file("extdata", package = "HDXBoxeR") # # extreme_input_gap(hm_dir =path_to_folders, replicates = 3, # timepoints =c(3, 60, 1800, 72000), output_path=tempdir()) # # if data is missing, it writes there values for undeuterated # extreme_input_undeut(hm_dir =path_to_folders, replicates = 2, # timepoints =c(3, 60, 1800, 72000), output_path=tempdir()) # ## ----eval=FALSE--------------------------------------------------------------- # # library(HDXBoxeR) # file_nm<-system.file("extdata", "All_results_table.csv", package = "HDXBoxeR") # # ############################ # ## Load data # ########################### # # ##########Deuteration uptake vs procent deuteration file preparation # #### Input for uptake deuteration # a<-output_tp(file_nm) # # input for percent deuteration # b<-output_tp(file_nm, percent=T) # # # Time courses inputs # tc_a<-output_tc(filepath = file_nm) # tc_aP<-output_tc(filepath = file_nm, percent=T) # # # ########################### # ##definition of some important variables # ################### # # states<-arguments_call1(filepath=file_nm) # times<-arguments_call2(filepath=file_nm, states=states) # replicates<-arguments_call3(filepath=file_nm, states=states, times=times) # # ###################################### # ## Statistical analysis # ##################################### # # ###average calculation for all peptides # av1<-ave_timepoint(a) # ###average differences (against first Protein State in the file) # da1<-dif_ave(av1) # ###standard deviation calculation for all peptides # sd1<-sd_timepoint(a) # ### Global critical interval for 1 protein sets # CI_single(s1 =sd1[,7], replicates = 3 ) # ##Global critical interval for 2 protein sets # CI_2pts(s1 = sd1[,7], s2=sd1[,8], replicates = 3) # ##individual peptide p-value calculation against first set in the input file # pv1<-pv_timepoint(a) # # ## Plots # # ##uptake plots # # # define the timepoint using in the experiment. Here the times in seconds # x<-c(4, 60,1800, 72000) # #par(mfrow=c(1,2)) # uptake_plots(tc_aP[1:2,],x) # # # # Boxplots # boxplot_tp(a, col= c("gold2", "dodgerblue")) # # # Volcano plots # plots_vol_tp(a, cola=c(2,3), replicates=3, pv_cutoff = 0.01) # # # Significantly different peptides plot across the sequence # #par(mfrow=c(4,1)) # uncomment to have 4 panels # plot_peptide_sig_tp(a,nb_pep_row = 18, ranges = c(-Inf, -10, -2.5,0,2.5, 10, Inf), pv_cutoff = 0.01) # # ##Average uptake heat maps # ### heat maps # plot_heat_map_tp(a, mar_x=8, ranges = c(-Inf, -10, -2.5,0,2.5, 10, Inf), pv_cutoff=0.01) # # ### heat map for procent deuteration, require both uptake and procent deuteration data frame as input # plot_heat_map_tp_proc(input_up = a, input_proc = b, replicates=3) # # ###Maximum uptake or procent deuteration per peptide # plot_heat_map_max_uptake_tp(a, replicates=3) # ### # plot_heat_map_max_uptake_tp(input_up = a, input_proc = b, replicates=3, mar_x=8, ranges = c(-Inf, -10, -2.5,0,2.5, 10, Inf), pv_cutoff=0.01) # # ## Legend for the heatmaps. # #default ranges for figures does not require argument # legend_heat_map(ranges = c(-Inf, -10, -2.5,0,2.5, 10, Inf)) # # ###################### # ###Timecourses plots # ###################### # # ## average procent deuteration in timecourse # plots_av_tcourse(tc_aP, replicates = 3) # ###average heat map for timecourses # plot_heat_map_tc(tc_aP, replicates=3) # legend_heat_map_timecourse() # # ###Robot plots for the timecourses # robot_plot_All(thP = tc_aP, th=tc_a, pv_cutoff=0.01, CI_factor = 1) # # ##############Woods plots # ##timecourse and timepoint woods plots # ################ # # indp<-select_indices(b, times = c("3.00s", "60.00s")) # deuteration_woods_timepoints(b[indp,], replicates = 3) # # ## input for timecourses # deuteration_woods_timecourse(tc_aP, replicates=3) # # ###woods plots # woods_CI_plot(thP = tc_aP, th=tc_a) # ###USe more stingent parameters to have less peptides drawn. CI_factor is a factor that modifies (multiplicate) Critial interval. # woods_CI_plot(thP = tc_aP, th=tc_a, pv_cutoff=0.005, CI_factor = 5) # # # ####################### # ##Pymol scripts # ####################### # # pymol_script_significant_peptide(a, path=tempdir(), ranges=c(-Inf, seq(-120, 120, by=10), Inf), pv_cutoff = 0.01,replicates = 3) # ###same but for percent deuteration # pymol_script_significant_peptide_proc(input_proc = b, input_up = a,path=tempdir(), ranges=c(-Inf, seq(-120, 120, by=10), Inf), pv_cutoff = 0.01,replicates = 3, order.pep=FALSE) # # ###scripts prepared by residue # pymol_script_significant_residue(a, path=tempdir(), ranges=c(-Inf, seq(-120, 120, by=10), Inf), pv_cutoff = 0.01,replicates = 3) # # ################## # ##Summary # ################## # all_summary(file_nm, Dfact=0.85,replicates = 3) # # ############### # ##Outputs # ############### # # ###final output for the paper # ### USAGE output_prep(pathto_allresults.csv, output_name.csv) # output_prep(filepath = file_nm, output_name = tempfile()) # # ###long version of the uptake data, timepoint # ### USAGE output_tp_csv(pathto_allresults.csv, output_name.csv) # output_tp(filepath =file_nm , csv = tempfile()) # # ### verbose versions of the output functions create csv files with all important data averages, sd, and pvalues for uptake data # verbose_timepoint_output(filepath = file_nm, output_name = tempfile(), percent=TRUE) # # ########################### # ##Extreme input preparation # ########################### # # # returns empty column if data is missing # extreme_input_gap(hm_dir ="filepath", replicates = 3, # timepoints =c(3, 60, 1800, 72000), output_path=tempdir()) # # if data is missing, it writes there values for undeuterated # extreme_input_undeut(hm_dir ="filepath", replicates = 2, # timepoints =c(3, 60, 1800, 72000), output_path=tempdir()) # # ## ----------------------------------------------------------------------------- ##All functions with arguments lsf.str("package:HDXBoxeR")