% The script GeneList.m finds significantly up- and down-regulated genes for each archetype compared to the mean values of all samples. 
% The archetypes must be defined using the script GeneArc.m before this script is applied. 
% Genes that are found significantly up-regulated are saved in a matrix "Genes_high" whereas genes that are significantly down-regulated are saved in a matrix "Genes_low". 
% In addition to this, information for each gene can be extracted from a specified data file including gene annotation, pathways and functional classes belonging to each gene. 
% This information is saved in two matrices called "Genes_high_list" and "Genes_low_list" for up- and down-regulated genes respectively. 
% The log2 expression values of these genes are listed in two matrices called "Genes_value_high" and "Genes_value_low".

% This script uses inputs from GeneArc.m

k = input('State which archetype you wish to extract genes from: ');

[p q] = size(XC);
% finding standard deviations across samples from the data matrix Xnew
Xstd = std(Xnew,0,2);
% finding genes where the archetypes have gene expression that are more
% than 2 times standard deviations from the mean.
XClow = (-2)*Xstd;
XChigh = 2*Xstd;

weakest=[];
strongest=[];
for i=1:p
    h1=XC(i,k)<(-1); %use XClow(i) for 2*std
    h2=XC(i,k)>1; %use XChigh for 2*std 
    if h1==1
       weakest=[weakest; i];
    else end
    if h2==1
       strongest=[strongest; i];
    else end
end

Genes_low = Xtext(weakest);
Genes_high = Xtext(strongest);

% Gene search with annotation


% The original files PA1.txt and Pseudomonas_data_selected.csv can also be
% used. Remember to uncomment line 61 and 75

B = importdata('PA1SDP.txt','s'); % First collumn with the gene numbers
DATA = importdata('PseudomonasDataSDPselected.csv'); % Selected data



% The function 'char()' converts cell into string
Genes_high_list=[];
Genes_low_list=[];
Additional_high=[];
Additional_low=[];
index_high_all=[];
index_low_all=[];
index_value_high=[];
index_value_low=[];


for j=1:length(strongest)
    string1 = char(Genes_high(j));
    string1 = string1(1:6);
% Finds the string within B and returns the indexes where this string
% appears
    index_high = find(ismember(B,string1)==1);
    if isempty(index_high)
        Additional_high = [Additional_high; j]; % save indexes for entries that are not recognized in B
    end
    index_high_all = [index_high_all; index_high];
    index_value_high = [index_value_high; XC(strongest(j),k)];
    %index_value_high = [index_value_high; XC(strongest(j),k)]; 
end    
 
for j=1:length(weakest)
% Genes with weak expression:
    string2=char(Genes_low(j));
    string2=string2(1:6);

    index_low = find(ismember(B,string2)==1);
    if isempty(index_low)
        Additional_low = [Additional_low; j]; % save indexes for entries that are not recognized in B
    end
    index_low_all = [index_low_all; index_low];
    index_value_low= [index_value_low; XC(weakest(j),k)];
    %index_value_low= [index_value_low; XC(weakest(j),k)];
end

[valh,indh] = sort(index_high_all);
[vall, indl] = sort(index_low_all);
Genes_value_high = index_value_high(indh);
Genes_value_low = index_value_low(indl);

Genes_high_list =[DATA(sort(index_high_all));Genes_high(Additional_high)];
Genes_low_list = [DATA(sort(index_low_all));Genes_low(Additional_low)];






