Commit 51e7e1db authored by Trever Smith's avatar Trever Smith
Browse files

Data and scripts

All data and scripts for generating figures in the paper.
parent a2f074ea
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% MorphEUS classification trials %
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%% Script to perform classification trials and MorphEUS classifications
%%% Description:
% prompts user if doing joint or individual profile, asks for other
% settings, asks user to select drugs to include in calculations, asks user
% to select drug(s) to apply, then performs MorphEUS classification trials
% saves result at end of maxrun number runs
% loads joint profile, takes 80 untreated from each 025x and 3x workspace,
% runs backwards select, picks out the best result (decided by highest
% percent success, if there is a tie, then with the lowest variable number)
% records results from what was pointing to what, then repeat the process
% from the top with a new random set of untreated
% will save result in bayes and a log of all outputs in logs
%% clear workspace and set up paths
clear
add_all_paths
rng('default')
% turn off TeX interpreter
set(groot, 'defaultAxesTickLabelInterpreter', 'none')
% get user input for name of variable to save at end
prompt = {'Enter a name for the variable information to be stored'};
dlgtitle = 'Save bayes_struct as:';
definput = {'bayes_struct'};
user_save_title = inputdlg(prompt,dlgtitle,[1 60],definput);
%% easy way to get and change the workspaces
%%% updated so that you only need joint workspace name and the other two
%%% will automatically take and separate the workspace approproiately
%x3_workspace_name = "full_3x_workspace_with_stonybrook";
% if you don't have most updated for 3x, will default to full_3x_with_redos_no_A22_drug_name_only
%x025_workspace_name = "full_025x_workspace_with_stonybrook";
% if you don't have most updated for 025x, will default to full_025x_with_redos_v3_drug_names_only
%%%% BASE WORKSPACES %%%%
%%%% workspaces must be located in subfolder workspaces
% requried spaces -- main data workspace
morpheus_workspace = "morpheus_workspace_03232020.mat";
% second morpheus workspace
morpheus2_workspace = "morpheus2_baseworkspace_11132020.mat";
% third morphues workspace
morpheus3_workspace = "morpheus3_workspace_img_01062021.mat";
% morphEUS3 standard workspace
morpheus3_standard_workspace = "morphEUS3_standard_basespace_05142021.mat";
base_workspaces = [morpheus_workspace morpheus2_workspace morpheus3_workspace morpheus3_standard_workspace];
% allow user to select base workspace
[indexes_chosen, any_chosen_tf] = listdlg('ListString',base_workspaces,...
'Name',"Select base workspace",'ListSize',[220 300],'PromptString',"base_workspaces");
joint_workspace_name = base_workspaces(indexes_chosen);
% all applied workspaces
condition_workspace_name = "all_condition_data_03232020.mat";
timecourse_workspace_name = "timecourse_data_03232020.mat";
clinical_workspace_name = "clinical_data_formatted_03232020.mat";
stonybrook2h_workspace_name = "stonybrook_2h_03232020.mat";
stonybrook24h_workspace_name = "stonybrook_24h_03232020.mat";
applied_table_low_workspace_name = "low_dose_applied_07212020.mat";
applied_table_high_workspace_name = "high_dose_applied_07212020.mat";
batch_workspace_name = "Batch_experiment_workspace_10132020.mat";
newdrug_workspace_name = "newdrug_experiment_workspace_img_11192020.mat";
morpheus2_workspace_name = "morpheus2_11052020.mat";
morpheus3_standard_workspace_name = "morpheus3_standard_applied_05142021.mat";
applied_workspace_list = {timecourse_workspace_name, ...
clinical_workspace_name,stonybrook24h_workspace_name,stonybrook2h_workspace_name,...
condition_workspace_name,applied_table_low_workspace_name,applied_table_high_workspace_name,...
batch_workspace_name,newdrug_workspace_name,morpheus2_workspace_name,morpheus3_standard_workspace_name};
all_workspaces = {joint_workspace_name, timecourse_workspace_name, ...
clinical_workspace_name,stonybrook24h_workspace_name,stonybrook2h_workspace_name,...
condition_workspace_name,batch_workspace_name,newdrug_workspace_name,morpheus2_workspace_name,morpheus3_standard_workspace_name};
%% create a log
% get time string for a unique log name/variable name at the end
time_string = strrep(datestr(clock),":","-");
logname = strcat("./logs/bayes log ",user_save_title, time_string,".txt");
diary(logname)
% for clarity of reading diary
warning off
%% some other settings
with_randomized_labels = false; %Different than random_comapre (just for this script)
% we are doing backwards select, so do feature reduction is true
do_feature_reduction = true;
% set to on to create, off to not create (will be invisible with off)
create_cm = 'off';
show_pca = false;
% settings for knn_helper
tit_add = "";
knn_type = 'median';
knn_with_apply = false;
% yes we are doing bayesian stuff
bayesian = true;
% for new52 joint profile
use_default = false;
% joint or individual profile question
joint_profile_question = questdlg('Joint or individual profiles', ...
'joint profiles?', ...
'joint','individual','joint');
switch joint_profile_question
case 'joint'
do_joint_profile = true;
case 'individual'
do_joint_profile = false;
end
% if not joint profile, 3x or 025x
if ~do_joint_profile
dose_question = questdlg('3x or 025x', ...
'dose?', ...
'3x','025x','3x');
switch dose_question
case '3x'
x3_dose = true;
case '025x'
x3_dose = false;
end
else % if do joint profiles, do flattened or separate?
flatten_question = questdlg('Flattened or separate?', ...
'???',...
'flattened','separate','flattened');
switch flatten_question
case 'flattened'
flattened_joint = true;
case 'separate'
flattened_joint = false;
end
end
% the usual
variables_to_create = {'disc', 'include_DMSO', 'large_group','nn2', 'no_TVN','apply_a_workspace','replicates_random','random_at_end','no_FM', 'no_syto','remove_feature_count', 'remove_heterogeneity_features','debug_mode'};
default_values = [true, false, true, false, false, false, false, false, false, false, false, false, false];
disp("include_DMSO = include DMSO in TVN transformation")
disp("large group = categorize by larger groups instead of finer groups")
disp("nn2 = use second nearest neighbors as well")
disp("no_TVN = do not TVN transform")
disp("apply_a_workspace = get prompted to apply a workspace")
disp("replicates_random = run this with random labels for each replicate")
disp("random_at_end = randomize the labels at the end, after doing backwards select")
disp("no_FM = do not use any FM features (both feature count and foci features)")
disp("no_syto = do not use any syto features (both feature count and foci features)")
disp("remove_feature_count = do not use feature count features")
disp("remove_heterogeneity_features = remove Q1, Q3 and IQR features")
disp("debug_mode = run in debug mode")
target_values = checkboxList("Select Settings", variables_to_create, default_values);
initialize_variables
% we've been using essentially this set so let's have it get
% suggested
suggestion = {'Mer','Amp','Ctax','INH','EMB','ETA','IMI','Van','Cyc','Del',...
'Lev','Mox','Clz','MIT','Olf','Kan','Amk','Cam','Cla','Dox','Gent',...
'Strep','Tet','Lin','Pre','CCCP','Cer','Mon','Nig','Thi','RifT','BDQ',...
'RIF','THL','water','Untreated'};
%% do first run separate to not mess with parfor
% do an initial load of workspace to check number of untreated that can max
% be used
chosen_workspace = joint_workspace_name;
chosen_workspaces = {joint_workspace_name};
load_workspaces
num_untreated_in_wksp = sum(strcmp(final_data_table.DRUG,"Untreated"));
if do_joint_profile
% divide by 2 bc after flattening will be half
num_untreated_in_wksp = num_untreated_in_wksp/2;
end
% if debugging, only do 5 runs
% num rows = number of untreated rows to keep
% if debugging, do a small amount so it can run faster
if debug_mode
maxrun = 5;
num_rows = 32;
number_untreated_used = 80;
else
maxrun = 70;
num_rows = 80;
number_untreated_used = 80;
if num_untreated_in_wksp < num_rows
num_rows = num_untreated_in_wksp;
number_untreated_used = num_untreated_in_wksp;
end
end
runct = 1;
% don't want to just copy paste code again, this is the same content as the
% loop just over separately
% make something to keep track of the variables
all_variable_sets = cell(maxrun,1);
% make something to keep track of the results_tables
all_results_tables = cell(maxrun,1);
tic
bayes_loop_content
%% run the rest of them -> use that cluster bby
% ah oh well we tried parfor is not working bby
for runct = 2:maxrun
% since the exact same as before, just copied into a separate document
bayes_loop_content
end
%% auto save because this will take a while
if do_joint_profile
disp("JOINT PROFILE")
save_title = strcat("./bayes/",user_save_title," joint ", time_string);
else
disp("INDIVIDUAL PROFILE")
if x3_dose
disp("3x dose")
save_title = strcat("./bayes/",user_save_title," 3x ", time_string);
else
disp("025x dose")
save_title = strcat("./bayes/",user_save_title," 025x ", time_string);
end
end
% want to indicate in the title if we had random replciates
if replicates_random
save_title = strcat(save_title, " replicates randomized");
end
if random_at_end
save_title = strcat(save_title, " random at end");
end
% let's save the git info as well
try
git_info = getGitInfo();
git_hash = git_info.hash;
catch
git_hash = "unable to save git information--likely run using a previous commit";
disp("unable to save git information")
end
% if there was an applied drug, save that
if apply
save(save_title,'bayes_struct','all_variable_sets','all_results_tables','git_hash','applied_drug','final_data_table')
else
% save a sample final data table (even if it's just from last run)
save(save_title,'bayes_struct','all_variable_sets','all_results_tables','git_hash','final_data_table')
end
disp(strcat("Saved bayes_struct as ", save_title))
diary off
toc
\ No newline at end of file
This diff is collapsed.
%% this works for combining any columns from a drug-drug or drug-moa table
% instead of pulling one column from multiple figures, just pull one column
% from each data set and make the figure all at once. better for saving
% space and makes things feel less weird
% if it's the first run, generate 'firstrun' var so that a table can be
% created
if ~exist('firstrun','var')
firstrun = true;
end
% first thing to do is run create_connectivity_map
create_connectivity_map
%% Recreate figure S8 B
% just do this part the first time
%blank_table = var_to_add;
% comment out if not doing a drug table
%blank_table_drug = var_to_add_drug;
%% fig 8
% streamlined it past this but want to keep this and just have it be out of
% the way for archival purposess
while false
% filename is bayews_file
valid_name = genvarname(bayes_file);
%two_way_table_bi_moa_pct
% find the variable with 6h
%target_var = contains(two_way_table_bi_moa_pct.Properties.VariableNames,"6h");
%target_var = contains(two_way_table_bi_moa_pct.Properties.VariableNames,"mox");
%target_var = contains(two_way_table_bi_moa_pct.Properties.VariableNames,"beda");
%target_var = contains(two_way_table_bi_moa_pct.Properties.VariableNames,"rifamp");
target_var = contains(two_way_table_bi_moa_pct.Properties.VariableNames,"d6");
% grab that var
var_to_add = two_way_table_bi_moa_pct(:,target_var);
drug_name = var_to_add.Properties.VariableNames{1};
end
%% now need to do this for fig 4 -- this has both MoA and drug so need
% this is also the section for the stonybrook part
% to also get symmetric_table
search_for = "d702";
is_low_dose = contains(bayes_file,"025x");
is_high_dose = contains(bayes_file,"3x");
is_joint = contains(bayes_file,"joint");
if is_low_dose
suffix = "_LD";
elseif is_high_dose
suffix = "_HD";
elseif is_joint
suffix = "_JP";
else
suffix = "_NA";
end
% filename is bayes_file
valid_name = genvarname(bayes_file);
target_var = contains(two_way_table_bi_moa_pct.Properties.VariableNames,search_for);
target_var_drug = contains(symmetric_table.Properties.VariableNames,search_for);
% grab that var
var_to_add = two_way_table_bi_moa_pct(:,target_var);
% remove dummy final var that was used to keep the applied drug on the
% bottom (if it exists)
if any(contains(var_to_add.Properties.RowNames,"zzz"))
var_to_add('zzz',:) = [];
end
% need to rename with JP or HD or LD
drug_name = var_to_add.Properties.VariableNames{1};
var_to_add.Properties.VariableNames{drug_name} = char(strcat(drug_name,suffix));
% for drug, remove the row value from the table
symmetric_table(drug_name,:) = [];
var_to_add_drug = symmetric_table(:,target_var_drug);
var_to_add_drug.Properties.VariableNames{drug_name} = char(strcat(drug_name,suffix));
%% add it to the list
if firstrun
% if it's the first run, create the table
% just do this part the first time
blank_table = var_to_add;
% comment out if not doing a drug table
blank_table_drug = var_to_add_drug;
% future runs should go to the false sections
firstrun = false;
else
% all subsequent runs want to add to the table
blank_table = horzcat(blank_table,var_to_add);
% comment out if not doing a drug table
blank_table_drug = horzcat(blank_table_drug,var_to_add_drug);
end
%% save the workspsace
%save('workspace_for_fig_s8b.mat','blank_table')
%save('workspace_for_fig_4.mat','blank_table')
save('workspace_for_fig_S7.mat','blank_table')
%save('workspace_for_fig_4_drug.mat','blank_table_drug')
save('workspace_for_fig_S7.mat','blank_table_drug','blank_table')
%% when ready to put into plots
% have a while false there just so that these don't auto-run
% new map -> parula plus some purple
parula_plus = parula;
parula_plus = parula_plus(6:end,:);
% need to add purple color to beginning -> fade from first blue color to a
% purple color
first_blue = parula_plus(1,:);
purple_hue = [59/255 14/255 101/255];
vec = [0;100];
raw = [first_blue; purple_hue];
N = 14;
blue_to_purple = interp1(vec,raw,linspace(100,0,N),'pchip');
parula_plus = [blue_to_purple; parula_plus];
while false
make_connectivity_heatmap(blank_table,false);
xtickangle(90)
colormap(parula_plus)
%% drug connectivity heatmap
make_connectivity_heatmap(blank_table_drug,false);
xtickangle(90)
colormap(parula_plus)
end
\ No newline at end of file
function node_colors = assign_knn_color(knn_digraph,color_struct,all_categories)
% will use structures color_struct and all_categories to assign colors for
% the nodes of knn_digraph to color the graph in a more readable manner
% all of the fields in all_categories must be named the same as the
% structures in color_struct for this to work (or things will just get
% colored as other often)
node_colors = [];
% get all of the different categories in all_categories
% we want this and not the categories in color_struct because
% color_struct is the same regardless if we are using large_group or
% not whereas all_categories will change
category_fields = fieldnames(all_categories)';
for row = 1:height(knn_digraph.Nodes)
% counted variable keeps track and allows things to be put in the
% "other" category
counted = false;
% get current node name
name = table2array(knn_digraph.Nodes(row,1));
% loop through all possible moa category fields
for j = category_fields
moa_category = j{1};
category_list = all_categories.(moa_category);
if ismember(name,category_list)
node_colors = [node_colors; color_struct.(moa_category)];
% set counted to true makes sure we won't label this as
% other since it was selected at some point
counted = true;
end
end
% if was not in any of the categoires, mark it as other
if ~counted
node_colors = [node_colors; color_struct.other];
end
end
end
function drug_moas = change_to_moa(drugList,all_categories)
% INPUT list of drugs
% will read them in and change them to their MoA
% is this like assign MoA? probably
% let me live my life
% all_categories is a struct where ,for example, all_categories.protein has
% an array of all of the different protein drugs
drug_moas = drugList;
category_fields = fieldnames(all_categories)';
% go through and find moa of each drug and neighbor in order to create
% confusion matrix (don't neeed MoA of others and unknowns)
for k = 1:length(drugList)
% get drug
drug1 = drugList(k);
drug1 = drug1{1};
% counted variable keeps track and allows things to be put in the
% "other" category
counted= false;
% loop through each moa category in categories
for j = category_fields
moa_category = j{1};
category_list = all_categories.(moa_category);
% now loop through each drug
if ismember(drug1,category_list)
% go through all their drug indicies
drug_moas(k) = {moa_category};
% set counted to true makes sure we won't label this as
% other since it was selected at some point
counted = true;
end
end
% if was not in any of the categorires, mark it as other
if ~counted
drug_moas(k) = {'other'};
end
end
end
function connectivity_plot(results_table,color_struct,all_categories,map,wbg)
% create connectivity plot from bayes_struct
% needs color_struct and all_categories (probably created in the knn color
% setup section) in order to create node colors
% needs helper function node_colors
% map -> a colormap, default to a cool red one
% wbg -> boolean on whether to create a white background, default false
%% set default vars
% by default, do a grey background
if ~exist('wbg','var')
wbg = false;
end
% create a default colormap
if ~exist('map','var')
vec = [100; 50; 0;];
raw = [ 1 1 1; ...
255/255 104/255 104/255; ...
47/255 0 0];
N = 128;
map = interp1(vec,raw,linspace(100,0,N),'pchip');
end
%% set up for figure
% just store anything in S you wanna use in the callback
% Plot different plots according to slider location.
% some settings
fig_w = 1000;
fig_h = 650;
% figure handle
S.fh = figure('units','pixels',...
'position',[300 600 fig_w fig_h],...
'name','connectivity plot',...
'numbertitle','off');
% S.ax = axes('unit','pix',...
% 'position',[20 80 560 510]);
%% begin connectivity plot
% create digraph
drug_names = results_table.DRUG;
neighbor_names = results_table.NEIGHBOR;
knn_digraph = digraph(drug_names,neighbor_names, ...
results_table.DISTANCE);
% get node colors
node_colors = assign_knn_color(knn_digraph,color_struct,all_categories);
% knn_digraph.Edges.Weight = results_table.DISTANCE;
% create knn plot
subplot(1,3,1:2) % allow more space for connectivity plot than histogram
% want to get a distance but account for when things are zero
scaled_distance = 7*knn_digraph.Edges.Weight./(max(knn_digraph.Edges.Weight));
% plot
S.LN = plot(knn_digraph,'Layout','force',...
'LineWidth',scaled_distance,...
'EdgeColor',[0 0 0],...
'NodeColor',node_colors,...
'ShowArrows','off');
% 'EdgeAlpha',1,...
% if not white background, set a grey background
if ~wbg
set(gca,'Color',[.8 .8 .8]);
end
% save white background variable in callback struct
S.wbg = wbg;
% set up colorbar
% colorbar
% colormap(S.fh, map);
% get min and max distances for colorbar limits
max_dist = max(results_table.DISTANCE);
min_dist = min(results_table.DISTANCE<