addpath C:\AllWorkFiles\aaa\Matrix_eQTL\ALL_Work\Matrix_eQTL_Matlab\
clear;clc;
%%
tic;
s = RandStream('mt19937ar','Seed',20120214);
nKeepGene = 2201;
nKeepSnps = 57333;
nKeepCols = 840;
snps = SlicedData;
snps.dataSlices{1} = floor(s.rand( nKeepSnps, nKeepCols)*3);
snps.rowNameSlices{1} = cellstr(num2str((1:nKeepSnps)','Snp_%05d'));
snps.columnNames = cellstr(num2str((1:nKeepCols)','Sample_%03d'));
gene = SlicedData;
gene.dataSlices{1} = s.randn( nKeepGene, nKeepCols);
gene.rowNameSlices{1} = cellstr(num2str((1:nKeepGene)','Gene_%04d'));
gene.columnNames = cellstr(num2str((1:nKeepCols)','Sample_%03d'));
snps_aaa = snps.Clone();
snps_aaa.RowCenter();
[covariates, ~] = eigs((snps_aaa.dataSlices{1})'*snps_aaa.dataSlices{1},10);
clear snps_aaa;
% correct for randomness of eigs
covariates = bsxfun(@times, covariates, sign(covariates(1,:)));
% add random gene mean
gene.dataSlices{1} = bsxfun(@plus, gene.dataSlices{1}, s.randn(nKeepGene,1).^2);
% add signal to the first two genes
gene.dataSlices{1}(1,:) = snps.dataSlices{1}(1,:) + s.randn(1,nKeepCols);
gene.dataSlices{1}(2,:) = (snps.dataSlices{1}(2,:)==1) + s.randn(1,nKeepCols);
% add covariates to gene expression
gene.dataSlices{1} = gene.dataSlices{1} + s.randn(nKeepGene,10) * covariates';
clear s nKeepGene nKeepSnps nKeepCols;
toc;
%% Merlin
tic;
folder = 'Merlin';
if(~exist(folder,'dir'))
mkdir(folder)
end;
[fid, msg] = fopen('Merlin\Testing.dat','w');
if(fid==-1); error(msg); end;
fprintf(fid,'C\tCvrt_%d\n',1:size(covariates,2));
fprintf(fid,'T\t%s\n',gene.rowNameSlices{1}{:});
fprintf(fid,'M\t%s\n',snps.rowNameSlices{1}{:});
fclose(fid);
[fid, msg] = fopen('Merlin\Testing.map','w');
if(fid==-1); error(msg); end;
z = [snps.rowNameSlices{1} num2cell((1:snps.nRows())') ]';
fprintf(fid,'1\t%s\t%d\n',z{:});
fclose(fid);
toAC = [9 'A' 9 'A'; 9 'A' 9 'C';9 'C' 9 'C'];
[fid, msg] = fopen('Merlin\Testing.ped','w');
if(fid==-1); error(msg); end;
for s=1:gene.nCols
fprintf(fid,'%i\t1\t0\t0\t1',s);
fprintf(fid,'\t%f',covariates(s,:));
fprintf(fid,'\t%f',gene.dataSlices{1}(:,s));
z = toAC(snps.dataSlices{1}(:,s)+1,:);
z = z';
fprintf(fid,z(:));
fprintf(fid,'\n');
end;
fclose(fid);
clear ans fid msg folder toAC s z;
toc;
%% Plink
tic;
folder = 'Plink';
if(~exist(folder,'dir'))
mkdir(folder)
end;
[fid, msg] = fopen('Plink\Testing.map','w');
if(fid==-1); error(msg); end;
z = [snps.rowNameSlices{1} num2cell((1:snps.nRows())') ]';
fprintf(fid,'1\t%s\t0\t%d\n',z{:});
fclose(fid);
toAC = [9 'A' 9 'A'; 9 'A' 9 'C';9 'C' 9 'C'];
[fid, msg] = fopen('Plink\Testing.ped','w');
if(fid==-1); error(msg); end;
for s=1:gene.nCols
fprintf(fid,'%i\t1\t0\t0\t1\t0',s);
z = toAC(snps.dataSlices{1}(:,s)+1,:);
z = z';
fprintf(fid,z(:));
fprintf(fid,'\n');
end;
fclose(fid);
[fid, msg] = fopen('Plink\TestingPheno.txt','w');
if(fid==-1); error(msg); end;
for s=1:gene.nCols
fprintf(fid,'%i\t1',s);
fprintf(fid,'\t%f',gene.dataSlices{1}(:,s));
fprintf(fid,'\n');
end;
fclose(fid);
[fid, msg] = fopen('Plink\TestingCvrt.txt','w');
if(fid==-1); error(msg); end;
for s=1:gene.nCols
fprintf(fid,'%i\t1',s);
fprintf(fid,'\t%f',covariates(s,:));
fprintf(fid,'\n');
end;
fclose(fid);
clear fid ans folder msg toAC z s
toc;
%% Rqtl
tic;
folder = 'Rqtl';
if(~exist(folder,'dir'))
mkdir(folder)
end;
[fid, msg] = fopen('Rqtl\Testing_genes.csv','w');
if(fid==-1); error(msg); end;
fprintf(fid,'id');
fprintf(fid,',%d',1:gene.nCols);
fprintf(fid,'\n');
for i=1:gene.nRows
fprintf(fid,'%s',gene.rowNameSlices{1}{i});
fprintf(fid,',%f',gene.dataSlices{1}(i,:));
fprintf(fid,'\n');
end;
fclose(fid);
toAHB = [',A';',H';',B'];
zz = cell(3,snps.nRows);
zz(1,:) = snps.rowNameSlices{1};
zz(2,:) = num2cell(1:snps.nRows);
for i=1:snps.nRows
z = toAHB(snps.dataSlices{1}(i,:)+1,:);
z = z';
zz{3,i} = z(:);
end;
[fid, msg] = fopen('Rqtl\Testing_snps.csv','w');
if(fid==-1); error(msg); end;
fprintf(fid,'id,,');
fprintf(fid,',%d',1:gene.nCols);
fprintf(fid,'\n');
fprintf(fid,'%s,1,%d%s\n',zz{:});
fclose(fid);
clear z zz;
[fid, msg] = fopen('Rqtl\Testing_new_genes.csv','w');
if(fid==-1); error(msg); end;
fprintf(fid,'%s,',gene.rowNameSlices{1}{:});
fprintf(fid,'id\n');
if(fid==-1); error(msg); end;
for s=1:gene.nCols
fprintf(fid,'%f,',gene.dataSlices{1}(:,s));
fprintf(fid,'%d',s);
fprintf(fid,'\n');
end;
fclose(fid);
[fid, msg] = fopen('Rqtl\Testing_new_snps.csv','w');
if(fid==-1); error(msg); end;
fprintf(fid,'id');
fprintf(fid,',%s',snps.rowNameSlices{1}{:});
fprintf(fid,'\n');
fprintf(fid,',%d',1:snps.nRows());
fprintf(fid,'\n');
for s=1:gene.nCols
fprintf(fid,'%d',s);
% fprintf(fid,'\t%s',toAC{snps.dataSlices{1}(:,s)+1});
z = toAHB(snps.dataSlices{1}(:,s)+1,:);
z = z';
fprintf(fid,z(:));
fprintf(fid,'\n');
end;
fclose(fid);
save('Rqtl\Testing_cvrt.txt','covariates','-ascii','-tabs','-double');
clear ans folder fid msg i s toAC toAHB z
toc;
%% Matrix eQTL
tic;
folder = 'Matrix_eQTL';
if(~exist(folder,'dir'))
mkdir(folder)
end;
[fid, msg] = fopen('Matrix_eQTL\Testing_genes.txt','w');
if(fid==-1); error(msg); end;
fprintf(fid,'id');
fprintf(fid,'\t%s',gene.columnNames{:});
fprintf(fid,'\n');
for i=1:gene.nRows
fprintf(fid,'%s',gene.rowNameSlices{1}{i});
fprintf(fid,'\t%f',gene.dataSlices{1}(i,:));
fprintf(fid,'\n');
end;
fclose(fid);
toAC = [9 '0';9 '1';9 '2'];
zz = cell(2,snps.nRows);
zz(1,:) = snps.rowNameSlices{1};
for i=1:snps.nRows
z = toAC(snps.dataSlices{1}(i,:)+1,:);
z = z';
zz{2,i} = z(:);
end;
[fid, msg] = fopen('Matrix_eQTL\Testing_snps.txt','w');
if(fid==-1); error(msg); end;
fprintf(fid,'id');
fprintf(fid,'\t%s',snps.columnNames{:});
fprintf(fid,'\n');
fprintf(fid,'%s%s\n',zz{:});
fclose(fid);
[fid, msg] = fopen('Matrix_eQTL\Testing_cvrt.txt','w');
if(fid==-1); error(msg); end;
fprintf(fid,'id');
fprintf(fid,'\t%s',snps.columnNames{:});
fprintf(fid,'\n');
for i=1:size(covariates,2)
fprintf(fid,'Cvrt_%d',i);
fprintf(fid,'\t%d',covariates(:,i));
fprintf(fid,'\n');
end;
fclose(fid);
clear folder z zz ans fid msg i;
toc;
%% FastMap
tic;
folder = 'FastMap';
if(~exist(folder,'dir'))
mkdir(folder)
end;
[fid, msg] = fopen('FastMap\Testing_genes.txt','w');
if(fid==-1); error(msg); end;
fprintf(fid,'id\tid');
fprintf(fid,'\t%s',gene.columnNames{:});
fprintf(fid,'\n');
for i=1:gene.nRows
fprintf(fid,'%s\t%s',gene.rowNameSlices{1}{i},gene.rowNameSlices{1}{i});
fprintf(fid,'\t%f',gene.dataSlices{1}(i,:));
fprintf(fid,'\n');
end;
fclose(fid);
toAC = [9 '0';9 '1';9 '2'];
zz = cell(2,snps.nRows);
zz(1,:) = num2cell(1:snps.nRows);
for i=1:snps.nRows
z = toAC(snps.dataSlices{1}(i,:)+1,:);
z = z';
zz{2,i} = z(:);
end;
[fid, msg] = fopen('FastMap\Testing_snps.txt','w');
if(fid==-1); error(msg); end;
fprintf(fid,'id\tid');
fprintf(fid,'\t%s',snps.columnNames{:});
fprintf(fid,'\n');
fprintf(fid,'1\t%d\t%s\n',zz{:});
fclose(fid);
clear folder z zz ans fid msg i toAC;
toc;
← Back to Matrix eQTL page.
By Andrey Shabalin