ADSP PCA: Difference between revisions
Bradley Monk (talk | contribs) No edit summary |
Bradley Monk (talk | contribs) No edit summary |
||
Line 15: | Line 15: | ||
<br> <br> <br> | <br> <br> <br> | ||
== | ==PCA Code== | ||
---- | ---- | ||
<br><br> | <br><br> | ||
Line 21: | Line 21: | ||
<syntaxhighlight lang="matlab" line start="1" highlight="1" enclose="div"> | <syntaxhighlight lang="matlab" line start="1" highlight="1" enclose="div"> | ||
% ###################################################################### | % ###################################################################### | ||
%% | %% PRINCIPAL COMPONENTS ANALYSIS | ||
% ###################################################################### | % ###################################################################### | ||
clc; close all; clear; rng('shuffle') | clc; close all; clear; rng('shuffle') | ||
cd(fileparts(which('GENOS.m'))); | cd(fileparts(which('GENOS.m'))); | ||
Line 62: | Line 64: | ||
clearvars -except ADSP LOCI CASE CTRL PHEN PHE PHECASE PHECTRL | clearvars -except ADSP LOCI CASE CTRL PHEN PHE PHECASE PHECTRL | ||
Line 188: | Line 184: | ||
%% TAKE THE TOP N NUMBER OF VARIANTS | %% TAKE THE TOP N NUMBER OF VARIANTS | ||
N = | N = 50; | ||
Line 248: | Line 244: | ||
%###################################################################### | |||
%% PCA : PRINCIPAL COMPONENTS ANALYSIS | |||
%###################################################################### | |||
ss = statset('pca') | |||
ss.Display = 'iter'; | |||
ss.MaxIter = 100; | |||
ss.TolFun = 1e4; | |||
ss.TolX = 1e4; | |||
ss.UseParallel = true; | |||
% | % 'NumComponents',5, 'Algorithm','eig', Elapsed time is 70.459438 seconds. | ||
% [PCAC,PCAS,PCAlatent,PCAtsquared,PCAE] = pca( PCAMX' ,'Options',ss); | |||
% [PCAC,PCAS, | |||
% PCAMX(PCAMX>0) = 1; | |||
[PCAC,PCAS,~,~,PCAE] = pca( PCAMX' , 'Options',ss); | |||
clearvars -except ADSP LOCI CASE CTRL PHEN PHE PHECASE PHECTRL... | |||
AMX AMXCASE AMXCTRL ADNN PCAMX PCAC PCAS PCAE | |||
clearvars -except ADSP | %% RECONSTRUCT ORIGINAL DATA (OPTIONAL TEST) | ||
% MEAN DEVIATE (CENTER) DATA | |||
mu = mean(PCAMX); | |||
Xi = bsxfun(@minus,PCAMX,mu); | |||
% Reconstruct the centered data | |||
Xj = PCAS*PCAC'; | |||
% Reconstruct original data | |||
PCAMX_REDUX = round(Xj' + mu); | |||
clearvars -except ADSP LOCI CASE CTRL PHEN PHE PHECASE PHECTRL... | |||
AMX AMXCASE AMXCTRL ADNN PCAMX PCAC PCAS PCAE PCAMX_REDUX | |||
</syntaxhighlight> | </syntaxhighlight> | ||
Line 292: | Line 300: | ||
== | ==PCA Plots== | ||
---- | ---- | ||
<br><br> | <br><br> | ||
====ALL PCs in 1D==== | |||
<syntaxhighlight lang="matlab" line start="1" highlight="1" enclose="div"> | |||
%% PLOT PCA --- ALL PCs IN 1D ------------------------------------- | |||
clc; close all; | |||
fh1=figure('Units','normalized','OuterPosition',[.02 .06 .9 .8],'Color','w'); | |||
hax1 = axes('Position',[.08 .08 .40 .80],'Color','none'); | |||
hax2 = axes('Position',[.56 .08 .40 .80],'Color','none'); | |||
Tx = PHE.COHORTNUM; | |||
Tn = unique(Tx); | |||
Ts = 'Cohort'; | |||
% axes(hax1); colormap(hax1, (prism(max(Tn))) ); | |||
axes(hax1); colormap(hax1, flipud(jet(max(Tn))) ); | |||
ph1 = scatter( (1:numel(PCAC(:,1))).*0+1 , PCAC(:,1),50, Tx,'o'); | |||
hold on | |||
ph2 = scatter( (1:numel(PCAC(:,2))).*0+2 , PCAC(:,2),50, Tx,'o'); | |||
hold on | |||
ph3 = scatter( (1:numel(PCAC(:,3))).*0+3 , PCAC(:,3),900, Tx,'.'); | |||
hold on | |||
ph4 = scatter( (1:numel(PCAC(:,4))).*0+4 , PCAC(:,4),900, Tx,'.'); | |||
hold on | |||
ph5 = scatter( (1:numel(PCAC(:,5))).*0+5 , PCAC(:,5),900, Tx,'.'); | |||
hax1.XLim = [0 6]; | |||
text(1,max(ph1.YData),sprintf(['explains: \n ' num2str(round(PCAE(1),2)) '%% \n']),... | |||
'Interpreter','tex','HorizontalAlignment','center','VerticalAlignment','bottom','FontSize',14); | |||
text(2,max(ph2.YData),sprintf(['explains: \n ' num2str(round(PCAE(2),2)) '%% \n']),... | |||
'Interpreter','tex','HorizontalAlignment','center','VerticalAlignment','bottom','FontSize',14); | |||
text(3,max(ph3.YData),sprintf(['explains: \n ' num2str(round(PCAE(3),2)) '%% \n']),... | |||
'Interpreter','tex','HorizontalAlignment','center','VerticalAlignment','bottom','FontSize',14); | |||
text(4,max(ph4.YData),sprintf(['explains: \n ' num2str(round(PCAE(4),2)) '%% \n']),... | |||
'Interpreter','tex','HorizontalAlignment','center','VerticalAlignment','bottom','FontSize',14); | |||
text(5,max(ph5.YData),sprintf(['explains: \n ' num2str(round(PCAE(5),2)) '%% \n']),... | |||
'Interpreter','tex','HorizontalAlignment','center','VerticalAlignment','bottom','FontSize',14); | |||
ph1 = | xlabel('\bf PCA1 - PCA5');ylabel('\bf Eigenvector Coefficients'); | ||
cb=colorbar('Ticks',unique(Tx),'TickLabels',(unique(Tx)),'Direction','reverse'); | |||
cb.Label.String = Ts; cb.Label.FontSize = 18; cb.Label.Rotation = -90; | |||
cb.Label.VerticalAlignment = 'baseline'; | |||
axes(hax2); colormap(hax2, (prism(max(Tn))) ); | |||
ph1 = scatter( (1:numel(PCAC(:,1))).*0+1 , PCAC(:,1),50, Tx,'o','MarkerEdgeAlpha',.02); | |||
hold on | |||
ph2 = scatter( (1:numel(PCAC(:,2))).*0+2 , PCAC(:,2),50, Tx,'o','MarkerEdgeAlpha',.02); | |||
hold on | |||
ph3 = scatter( (1:numel(PCAC(:,3))).*0+3 , PCAC(:,3),50, Tx,'o','MarkerEdgeAlpha',.02); | |||
hold on | |||
ph4 = scatter( (1:numel(PCAC(:,4))).*0+4 , PCAC(:,4),50, Tx,'o','MarkerEdgeAlpha',.02); | |||
hold on | |||
ph5 = scatter( (1:numel(PCAC(:,5))).*0+5 , PCAC(:,5),50, Tx,'o','MarkerEdgeAlpha',.02); | |||
hax2.XLim = [0 6]; | |||
</syntaxhighlight> | </syntaxhighlight> | ||
Line 326: | Line 390: | ||
==== | ====ALZHEIMER'S STATUS==== | ||
<syntaxhighlight lang="matlab" line start="1" highlight="1" enclose="div"> | <syntaxhighlight lang="matlab" line start="1" highlight="1" enclose="div"> | ||
%% PLOT | %% PLOT PCA --- CASE-vs-CTRL ------------------------------------------ | ||
close all; | clc; close all; | ||
fh1=figure('Units','normalized','Position',[.05 . | fh1=figure('Units','normalized','OuterPosition',[.02 .06 .97 .8],'Color','w'); | ||
hax1 = axes('Position',[.05 .07 .40 .85],'Color','none'); | |||
hax2 = axes('Position',[.52 .07 .40 .85],'Color','none'); | |||
Tx = PHE.AD; | |||
Tn = unique(Tx); | |||
Ts = 'CASE-vs-CTRL'; | |||
axes(hax1); colormap(hax1, (lines(max(Tn)+1)) ); | |||
ph1 = scatter(PCAC(:,1), PCAC(:,2),500, Tx,'.'); | |||
xlabel('\bf PCA1');ylabel('\bf PCA2'); | |||
cb=colorbar('Ticks',Tn,'TickLabels',Tn,'Direction','reverse'); | |||
cb.Label.String = Ts; cb.Label.FontSize = 14; cb.Label.Rotation = -90; | |||
cb.Label.VerticalAlignment = 'baseline'; | |||
axes(hax2); colormap(hax2, (lines(max(Tn)+1)) ); | |||
ph2 = scatter3(PCAC(:,1), PCAC(:,2), PCAC(:,3),500, Tx,'.'); | |||
xlabel('\bf PCA1');ylabel('\bf PCA2');zlabel('\bf PCA3'); | |||
view([65,20]) | |||
</syntaxhighlight> | </syntaxhighlight> | ||
<big>Top 100 variants</big> | <big>Top 100 variants</big> | ||
[[File: TSNE | [[File: TSNE Case Control.png|800px]] | ||
<big>Top 2000 variants</big> | <big>Top 2000 variants</big> | ||
[[File: TSNE | [[File: TSNE Case Control 2kvars.png|800px]] | ||
==== | ====STUDY COHORT==== | ||
<syntaxhighlight lang="matlab" line start="1" highlight="1" enclose="div"> | <syntaxhighlight lang="matlab" line start="1" highlight="1" enclose="div"> | ||
%% PLOT | %% PLOT PCA --- COHORT ------------------------------------------ | ||
close all; | clc; close all; | ||
fh1=figure('Units','normalized','Position',[.05 . | fh1=figure('Units','normalized','OuterPosition',[.02 .06 .97 .8],'Color','w'); | ||
hax1 = axes('Position',[.05 .07 .40 .85],'Color','none'); | |||
hax2 = axes('Position',[.52 .07 .40 .85],'Color','none'); | |||
Tx = PHE.COHORTNUM; | |||
Tn = unique(Tx); | |||
Ts = 'Cohort'; | |||
axes(hax1); colormap(hax1, flipud(jet(max(Tn))) ); | |||
ph1 = scatter(PCAC(:,1), PCAC(:,2),500, Tx,'.'); | |||
xlabel('\bf PCA1');ylabel('\bf PCA2'); | |||
cb=colorbar('Ticks',Tn,'TickLabels',Tn,'Direction','reverse'); | |||
cb.Label.String = Ts; cb.Label.FontSize = 14; cb.Label.Rotation = -90; | |||
cb.Label.VerticalAlignment = 'baseline'; | |||
axes(hax2); colormap(hax2, flipud(jet(max(Tn))) ); | |||
ph2 = scatter3(PCAC(:,1), PCAC(:,2), PCAC(:,3),500, Tx,'.'); | |||
xlabel('\bf PCA1');ylabel('\bf PCA2');zlabel('\bf PCA3'); | |||
view([65,20]) | |||
</syntaxhighlight> | </syntaxhighlight> | ||
<big>Top 100 variants</big> | <big>Top 100 variants</big> | ||
[[File: TSNE | [[File: TSNE Study Cohort.png|800px]] | ||
<big>Top 2000 variants</big> | <big>Top 2000 variants</big> | ||
[[File: TSNE | [[File: TSNE Study Cohort 2kvars.png|800px]] | ||
Line 376: | Line 476: | ||
====AGE==== | ====AGE==== | ||
<syntaxhighlight lang="matlab" line start="1" highlight="1" enclose="div"> | <syntaxhighlight lang="matlab" line start="1" highlight="1" enclose="div"> | ||
%% PLOT | %% PLOT PCA --- AGE ------------------------------------------ | ||
close all; | clc; close all; | ||
fh1=figure('Units','normalized','Position',[.05 . | fh1=figure('Units','normalized','OuterPosition',[.02 .06 .97 .8],'Color','w'); | ||
hax1 = axes('Position',[.05 .07 .40 .85],'Color','none'); | |||
hax2 = axes('Position',[.52 .07 .40 .85],'Color','none'); | |||
AGE = round(PHE.AGE); | AGE = round(PHE.AGE); | ||
[Y,E] = discretize(AGE,8); | |||
for nn = 1:numel(E) | |||
AGE(Y==nn) = E(nn); | |||
end | |||
Tx = AGE(AGE>60); | |||
Tn = unique(Tx); | |||
Ts = 'AGE-STATUS'; | |||
PCAage = PCAC(AGE>60,:); | |||
% | |||
axes(hax1); colormap(hax1, (jet(numel(Tn))) ); | |||
% axes(hax1); colormap(hax1, flipud(jet(max(Tn))) ); | |||
ph1 = scatter(PCAage(:,1), PCAage(:,2),500, Tx,'.'); | |||
xlabel('\bf PCA1');ylabel('\bf PCA2'); | |||
cb=colorbar('Ticks',Tn,'TickLabels',Tn,'Direction','reverse'); | |||
cb.Label.String = Ts; cb.Label.FontSize = 14; cb.Label.Rotation = -90; | |||
cb.Label.VerticalAlignment = 'baseline'; | |||
axes(hax2); colormap(hax2, (jet(numel(Tn))) ); | |||
% | % axes(hax2); colormap(hax2, flipud(jet(max(Tn))) ); | ||
ph2 = scatter3(PCAage(:,1), PCAage(:,2), PCAage(:,3),500, Tx,'.'); | |||
xlabel('\bf PCA1');ylabel('\bf PCA2');zlabel('\bf PCA3'); | |||
view([65,20]) | |||
</syntaxhighlight> | </syntaxhighlight> | ||
Line 413: | Line 524: | ||
====APOE STATUS==== | ====APOE STATUS==== | ||
<syntaxhighlight lang="matlab" line start="1" highlight="1" enclose="div"> | <syntaxhighlight lang="matlab" line start="1" highlight="1" enclose="div"> | ||
%% PLOT | %% PLOT PCA --- APOE-STATUS ------------------------------------------ | ||
close all; | clc; close all; | ||
fh1=figure('Units','normalized','Position',[.05 . | fh1=figure('Units','normalized','OuterPosition',[.02 .06 .97 .8],'Color','w'); | ||
hax1 = axes('Position',[.05 .07 .40 .85],'Color','none'); | |||
hax2 = axes('Position',[.52 .07 .40 .85],'Color','none'); | |||
APOE = unique(PHE.APOE); | |||
PHE.APOEID = PHE.SEX .* 0; | |||
for nn = 1:numel(APOE) | |||
PHE.APOEID(PHE.APOE == APOE(nn)) = nn; | |||
end | |||
Tx = PHE.APOEID; | |||
Tn = unique(Tx); | |||
Ts = 'APOE-STATUS'; | |||
ph1 = | axes(hax1); colormap(hax1, (lines(numel(Tn))) ); | ||
% axes(hax1); colormap(hax1, flipud(jet(max(Tn))) ); | |||
ph1 = scatter(PCAC(:,1), PCAC(:,2),500, Tx,'.'); | |||
xlabel('\bf PCA1');ylabel('\bf PCA2'); | |||
cb=colorbar('Ticks',Tn,'TickLabels',APOE,'Direction','reverse'); | |||
cb.Label.String = Ts; cb.Label.FontSize = 14; cb.Label.Rotation = -90; | |||
cb.Label.VerticalAlignment = 'baseline'; | |||
axes(hax2); colormap(hax2, (lines(numel(Tn))) ); | |||
% | % axes(hax2); colormap(hax2, flipud(jet(max(Tn))) ); | ||
ph2 = scatter3(PCAC(:,1), PCAC(:,2), PCAC(:,3),500, Tx,'.'); | |||
xlabel('\bf PCA1');ylabel('\bf PCA2');zlabel('\bf PCA3'); | |||
view([65,20]) | |||
</syntaxhighlight> | </syntaxhighlight> | ||
Line 445: | Line 570: | ||
Revision as of 04:28, 6 February 2018
t-Distributed Stochastic Neighbor Embedding (tSNE) is a technique like PCA that allows one perform dimensionality reduction for visualization purposes. Supposedly tSNE does better than PCA at revealing clusters in high-dimensional data. Whereas PCA only allows you to visualize two or three components directly against each at the same time -- tSNE uses math magic to coerce a high-dimensional dataset into either a 2D or 3D array.
t-SNE models each multi-dim object against a point on a euclidean surface in such a way that similar features are modeled by nearby point functions and dissimilar features are modeled by distant point functions. It then projects these points onto the plane allowing you visualize, what would effectively be, all the interesting principal component combinations - the ones that yield unique clusters - simultaneously.
Again, this code can be downloaded from the: GENOS GIT
PCA Code
% ######################################################################
%% PRINCIPAL COMPONENTS ANALYSIS
% ######################################################################
clc; close all; clear; rng('shuffle')
cd(fileparts(which('GENOS.m')));
MATDATA = 'ADSPdata.mat';
which(MATDATA)
load(MATDATA)
clearvars -except ADSP
%% CARBON COPY MAIN VARIABLES FROM ADSP.STRUCT
LOCI = ADSP.LOCI(:,1:17);
CASE = ADSP.CASE;
CTRL = ADSP.CTRL;
PHEN = ADSP.PHEN;
clearvars -except ADSP LOCI CASE CTRL PHEN
%###############################################################
%% DETERMINE WHICH PARTICIPANTS TO KEEP
%###############################################################
PHE = PHEN(PHEN.TOTvars>14000,:);
PHECASE = PHE(PHE.AD==1,:);
PHECTRL = PHE(PHE.AD==0,:);
clearvars -except ADSP LOCI CASE CTRL PHEN PHE PHECASE PHECTRL
%###############################################################
%% COUNT NUMBER OF VARIANTS PER LOCI
%###############################################################
% The varsum() function will go through each known variant loci
% and check whether anyone's SRR ID from your subset of IDs match
% all known SRR IDs for that loci. It will then sum the total
% number of alleles (+1 for hetzy-alt, +2 for homzy-alt) for each
% loci and return the totals.
[CASEN, CTRLN] = varsum(CASE, PHECASE.SRR, CTRL, PHECTRL.SRR);
% SAVE COUNTS AS NEW TABLE COLUMNS
LOCI.CASEREFS = numel(PHECASE.SRR)*2-CASEN;
LOCI.CTRLREFS = numel(PHECTRL.SRR)*2-CTRLN;
LOCI.CASEALTS = CASEN;
LOCI.CTRLALTS = CTRLN;
clearvars -except ADSP LOCI CASE CTRL PHEN PHE PHECASE PHECTRL
%###############################################################
%% COMPUTE FISHER'S P-VALUE
%###############################################################
% COMPUTE FISHERS STATISTICS FOR THE TRAINING GROUP
[FISHP, FISHOR] = fishp_mex(LOCI.CASEREFS,LOCI.CASEALTS,...
LOCI.CTRLREFS,LOCI.CTRLALTS);
LOCI.FISHPS = FISHP;
LOCI.FISHORS = FISHOR;
clearvars -except ADSP LOCI CASE CTRL PHEN PHE PHECASE PHECTRL
%% MAKE LATEST COUNTS THE MAIN TABLE STATS
LOCI.CASEREF = LOCI.CASEREFS;
LOCI.CTRLREF = LOCI.CTRLREFS;
LOCI.CASEALT = LOCI.CASEALTS;
LOCI.CTRLALT = LOCI.CTRLALTS;
LOCI.FISHP = LOCI.FISHPS;
LOCI.FISHOR = LOCI.FISHORS;
%% SORT VARIANT LOCI TABLE BY FISHER P-VALUE
[X,i] = sort(LOCI.FISHP);
LOCI = LOCI(i,:);
CASE = CASE(i);
CTRL = CTRL(i);
LOCI.VID = (1:size(LOCI,1))';
LOCI.GENE = string(LOCI.GENE);
clc; clearvars -except ADSP LOCI CASE CTRL PHEN PHE PHECASE PHECTRL
disp(LOCI(1:9,:))
%% STORE VARIABLES FOR PCA/TSNE AS 'AMX'
AMX = LOCI;
AMXCASE = CASE;
AMXCTRL = CTRL;
clearvars -except ADSP LOCI CASE CTRL PHEN PHE PHECASE PHECTRL...
AMX AMXCASE AMXCTRL
%% FILTER VARIANTS BASED ALT > REF
PASS = (AMX.CASEREF > AMX.CASEALT./1.5) | (AMX.CTRLREF > AMX.CTRLALT./1.5);
sum(~PASS)
AMX = AMX(PASS,:);
AMXCASE = AMXCASE(PASS);
AMXCTRL = AMXCTRL(PASS);
AMX.VID = (1:size(AMX,1))';
clearvars -except ADSP LOCI CASE CTRL PHEN PHE PHECASE PHECTRL...
AMX AMXCASE AMXCTRL
%% TAKE THE TOP N NUMBER OF VARIANTS
N = 50;
AMX = AMX(1:N,:);
AMXCASE = AMXCASE(1:N);
AMXCTRL = AMXCTRL(1:N);
AMX.VID = (1:size(AMX,1))';
fprintf('\n %.0f final loci count \n\n',size(AMX,1))
clearvars -except ADSP LOCI CASE CTRL PHEN PHE PHECASE PHECTRL...
AMX AMXCASE AMXCTRL
%% MAKE RECTANGLE NN VARIANT MATRIX
[ADNN, caMX, coMX] = varmx(AMX,AMXCASE,AMXCTRL,PHE);
clearvars -except ADSP LOCI CASE CTRL PHEN PHE PHECASE PHECTRL...
AMX AMXCASE AMXCTRL ADNN
%% RANDOMIZE ADNN AND REORDER PHE TO MATCH ADNN
ADL = ADNN(1,:);
ADN = ADNN(2:end,:);
i = randperm(size(ADN,1));
ADN = ADN(i,:);
ADNN = [ADL;ADN];
[i,j] = ismember(PHE.SRR, ADN(:,1) );
PHE.USED = i;
PHE.ORDER = j;
PHE = PHE(PHE.USED,:);
PHE = sortrows(PHE,'ORDER');
PCAMX = ADNN(2:end,4:end);
clearvars -except ADSP LOCI CASE CTRL PHEN PHE PHECASE PHECTRL...
AMX AMXCASE AMXCTRL ADNN PCAMX
%######################################################################
%% PCA : PRINCIPAL COMPONENTS ANALYSIS
%######################################################################
ss = statset('pca')
ss.Display = 'iter';
ss.MaxIter = 100;
ss.TolFun = 1e4;
ss.TolX = 1e4;
ss.UseParallel = true;
% 'NumComponents',5, 'Algorithm','eig', Elapsed time is 70.459438 seconds.
% [PCAC,PCAS,PCAlatent,PCAtsquared,PCAE] = pca( PCAMX' ,'Options',ss);
% PCAMX(PCAMX>0) = 1;
[PCAC,PCAS,~,~,PCAE] = pca( PCAMX' , 'Options',ss);
clearvars -except ADSP LOCI CASE CTRL PHEN PHE PHECASE PHECTRL...
AMX AMXCASE AMXCTRL ADNN PCAMX PCAC PCAS PCAE
%% RECONSTRUCT ORIGINAL DATA (OPTIONAL TEST)
% MEAN DEVIATE (CENTER) DATA
mu = mean(PCAMX);
Xi = bsxfun(@minus,PCAMX,mu);
% Reconstruct the centered data
Xj = PCAS*PCAC';
% Reconstruct original data
PCAMX_REDUX = round(Xj' + mu);
clearvars -except ADSP LOCI CASE CTRL PHEN PHE PHECASE PHECTRL...
AMX AMXCASE AMXCTRL ADNN PCAMX PCAC PCAS PCAE PCAMX_REDUX
PCA Plots
ALL PCs in 1D
%% PLOT PCA --- ALL PCs IN 1D -------------------------------------
clc; close all;
fh1=figure('Units','normalized','OuterPosition',[.02 .06 .9 .8],'Color','w');
hax1 = axes('Position',[.08 .08 .40 .80],'Color','none');
hax2 = axes('Position',[.56 .08 .40 .80],'Color','none');
Tx = PHE.COHORTNUM;
Tn = unique(Tx);
Ts = 'Cohort';
% axes(hax1); colormap(hax1, (prism(max(Tn))) );
axes(hax1); colormap(hax1, flipud(jet(max(Tn))) );
ph1 = scatter( (1:numel(PCAC(:,1))).*0+1 , PCAC(:,1),50, Tx,'o');
hold on
ph2 = scatter( (1:numel(PCAC(:,2))).*0+2 , PCAC(:,2),50, Tx,'o');
hold on
ph3 = scatter( (1:numel(PCAC(:,3))).*0+3 , PCAC(:,3),900, Tx,'.');
hold on
ph4 = scatter( (1:numel(PCAC(:,4))).*0+4 , PCAC(:,4),900, Tx,'.');
hold on
ph5 = scatter( (1:numel(PCAC(:,5))).*0+5 , PCAC(:,5),900, Tx,'.');
hax1.XLim = [0 6];
text(1,max(ph1.YData),sprintf(['explains: \n ' num2str(round(PCAE(1),2)) '%% \n']),...
'Interpreter','tex','HorizontalAlignment','center','VerticalAlignment','bottom','FontSize',14);
text(2,max(ph2.YData),sprintf(['explains: \n ' num2str(round(PCAE(2),2)) '%% \n']),...
'Interpreter','tex','HorizontalAlignment','center','VerticalAlignment','bottom','FontSize',14);
text(3,max(ph3.YData),sprintf(['explains: \n ' num2str(round(PCAE(3),2)) '%% \n']),...
'Interpreter','tex','HorizontalAlignment','center','VerticalAlignment','bottom','FontSize',14);
text(4,max(ph4.YData),sprintf(['explains: \n ' num2str(round(PCAE(4),2)) '%% \n']),...
'Interpreter','tex','HorizontalAlignment','center','VerticalAlignment','bottom','FontSize',14);
text(5,max(ph5.YData),sprintf(['explains: \n ' num2str(round(PCAE(5),2)) '%% \n']),...
'Interpreter','tex','HorizontalAlignment','center','VerticalAlignment','bottom','FontSize',14);
xlabel('\bf PCA1 - PCA5');ylabel('\bf Eigenvector Coefficients');
cb=colorbar('Ticks',unique(Tx),'TickLabels',(unique(Tx)),'Direction','reverse');
cb.Label.String = Ts; cb.Label.FontSize = 18; cb.Label.Rotation = -90;
cb.Label.VerticalAlignment = 'baseline';
axes(hax2); colormap(hax2, (prism(max(Tn))) );
ph1 = scatter( (1:numel(PCAC(:,1))).*0+1 , PCAC(:,1),50, Tx,'o','MarkerEdgeAlpha',.02);
hold on
ph2 = scatter( (1:numel(PCAC(:,2))).*0+2 , PCAC(:,2),50, Tx,'o','MarkerEdgeAlpha',.02);
hold on
ph3 = scatter( (1:numel(PCAC(:,3))).*0+3 , PCAC(:,3),50, Tx,'o','MarkerEdgeAlpha',.02);
hold on
ph4 = scatter( (1:numel(PCAC(:,4))).*0+4 , PCAC(:,4),50, Tx,'o','MarkerEdgeAlpha',.02);
hold on
ph5 = scatter( (1:numel(PCAC(:,5))).*0+5 , PCAC(:,5),50, Tx,'o','MarkerEdgeAlpha',.02);
hax2.XLim = [0 6];
Top 100 variants Error creating thumbnail: File missing
Top 2000 variants Error creating thumbnail: File missing
ALZHEIMER'S STATUS
%% PLOT PCA --- CASE-vs-CTRL ------------------------------------------
clc; close all;
fh1=figure('Units','normalized','OuterPosition',[.02 .06 .97 .8],'Color','w');
hax1 = axes('Position',[.05 .07 .40 .85],'Color','none');
hax2 = axes('Position',[.52 .07 .40 .85],'Color','none');
Tx = PHE.AD;
Tn = unique(Tx);
Ts = 'CASE-vs-CTRL';
axes(hax1); colormap(hax1, (lines(max(Tn)+1)) );
ph1 = scatter(PCAC(:,1), PCAC(:,2),500, Tx,'.');
xlabel('\bf PCA1');ylabel('\bf PCA2');
cb=colorbar('Ticks',Tn,'TickLabels',Tn,'Direction','reverse');
cb.Label.String = Ts; cb.Label.FontSize = 14; cb.Label.Rotation = -90;
cb.Label.VerticalAlignment = 'baseline';
axes(hax2); colormap(hax2, (lines(max(Tn)+1)) );
ph2 = scatter3(PCAC(:,1), PCAC(:,2), PCAC(:,3),500, Tx,'.');
xlabel('\bf PCA1');ylabel('\bf PCA2');zlabel('\bf PCA3');
view([65,20])
Top 100 variants Error creating thumbnail: File missing
Top 2000 variants Error creating thumbnail: File missing
STUDY COHORT
%% PLOT PCA --- COHORT ------------------------------------------
clc; close all;
fh1=figure('Units','normalized','OuterPosition',[.02 .06 .97 .8],'Color','w');
hax1 = axes('Position',[.05 .07 .40 .85],'Color','none');
hax2 = axes('Position',[.52 .07 .40 .85],'Color','none');
Tx = PHE.COHORTNUM;
Tn = unique(Tx);
Ts = 'Cohort';
axes(hax1); colormap(hax1, flipud(jet(max(Tn))) );
ph1 = scatter(PCAC(:,1), PCAC(:,2),500, Tx,'.');
xlabel('\bf PCA1');ylabel('\bf PCA2');
cb=colorbar('Ticks',Tn,'TickLabels',Tn,'Direction','reverse');
cb.Label.String = Ts; cb.Label.FontSize = 14; cb.Label.Rotation = -90;
cb.Label.VerticalAlignment = 'baseline';
axes(hax2); colormap(hax2, flipud(jet(max(Tn))) );
ph2 = scatter3(PCAC(:,1), PCAC(:,2), PCAC(:,3),500, Tx,'.');
xlabel('\bf PCA1');ylabel('\bf PCA2');zlabel('\bf PCA3');
view([65,20])
Top 100 variants Error creating thumbnail: File missing
Top 2000 variants Error creating thumbnail: File missing
AGE
%% PLOT PCA --- AGE ------------------------------------------
clc; close all;
fh1=figure('Units','normalized','OuterPosition',[.02 .06 .97 .8],'Color','w');
hax1 = axes('Position',[.05 .07 .40 .85],'Color','none');
hax2 = axes('Position',[.52 .07 .40 .85],'Color','none');
AGE = round(PHE.AGE);
[Y,E] = discretize(AGE,8);
for nn = 1:numel(E)
AGE(Y==nn) = E(nn);
end
Tx = AGE(AGE>60);
Tn = unique(Tx);
Ts = 'AGE-STATUS';
PCAage = PCAC(AGE>60,:);
axes(hax1); colormap(hax1, (jet(numel(Tn))) );
% axes(hax1); colormap(hax1, flipud(jet(max(Tn))) );
ph1 = scatter(PCAage(:,1), PCAage(:,2),500, Tx,'.');
xlabel('\bf PCA1');ylabel('\bf PCA2');
cb=colorbar('Ticks',Tn,'TickLabels',Tn,'Direction','reverse');
cb.Label.String = Ts; cb.Label.FontSize = 14; cb.Label.Rotation = -90;
cb.Label.VerticalAlignment = 'baseline';
axes(hax2); colormap(hax2, (jet(numel(Tn))) );
% axes(hax2); colormap(hax2, flipud(jet(max(Tn))) );
ph2 = scatter3(PCAage(:,1), PCAage(:,2), PCAage(:,3),500, Tx,'.');
xlabel('\bf PCA1');ylabel('\bf PCA2');zlabel('\bf PCA3');
view([65,20])
Top 100 variants Error creating thumbnail: File missing
Top 2000 variants Error creating thumbnail: File missing
APOE STATUS
%% PLOT PCA --- APOE-STATUS ------------------------------------------
clc; close all;
fh1=figure('Units','normalized','OuterPosition',[.02 .06 .97 .8],'Color','w');
hax1 = axes('Position',[.05 .07 .40 .85],'Color','none');
hax2 = axes('Position',[.52 .07 .40 .85],'Color','none');
APOE = unique(PHE.APOE);
PHE.APOEID = PHE.SEX .* 0;
for nn = 1:numel(APOE)
PHE.APOEID(PHE.APOE == APOE(nn)) = nn;
end
Tx = PHE.APOEID;
Tn = unique(Tx);
Ts = 'APOE-STATUS';
axes(hax1); colormap(hax1, (lines(numel(Tn))) );
% axes(hax1); colormap(hax1, flipud(jet(max(Tn))) );
ph1 = scatter(PCAC(:,1), PCAC(:,2),500, Tx,'.');
xlabel('\bf PCA1');ylabel('\bf PCA2');
cb=colorbar('Ticks',Tn,'TickLabels',APOE,'Direction','reverse');
cb.Label.String = Ts; cb.Label.FontSize = 14; cb.Label.Rotation = -90;
cb.Label.VerticalAlignment = 'baseline';
axes(hax2); colormap(hax2, (lines(numel(Tn))) );
% axes(hax2); colormap(hax2, flipud(jet(max(Tn))) );
ph2 = scatter3(PCAC(:,1), PCAC(:,2), PCAC(:,3),500, Tx,'.');
xlabel('\bf PCA1');ylabel('\bf PCA2');zlabel('\bf PCA3');
view([65,20])
Top 100 variants Error creating thumbnail: File missing
Top 2000 variants Error creating thumbnail: File missing
Additional Genomics Analyses