Matrix eQTL, Matlab code generating the test dataset

addpath C:\AllWorkFiles\aaa\Matrix_eQTL\ALL_Work\Matrix_eQTL_Matlab\
clear;clc;
%%
tic;

s = RandStream('mt19937ar','Seed',20120214);

nKeepGene = 2201;
nKeepSnps = 57333;
nKeepCols = 840;

snps = SlicedData;
snps.dataSlices{1} = floor(s.rand( nKeepSnps, nKeepCols)*3);
snps.rowNameSlices{1} = cellstr(num2str((1:nKeepSnps)','Snp_%05d'));
snps.columnNames = cellstr(num2str((1:nKeepCols)','Sample_%03d'));

gene = SlicedData;
gene.dataSlices{1} = s.randn( nKeepGene, nKeepCols);
gene.rowNameSlices{1} = cellstr(num2str((1:nKeepGene)','Gene_%04d'));
gene.columnNames = cellstr(num2str((1:nKeepCols)','Sample_%03d'));

snps_aaa = snps.Clone();
snps_aaa.RowCenter();
[covariates, ~] = eigs((snps_aaa.dataSlices{1})'*snps_aaa.dataSlices{1},10);
clear snps_aaa;

% correct for randomness of eigs
covariates = bsxfun(@times, covariates, sign(covariates(1,:)));

% add random gene mean
gene.dataSlices{1} = bsxfun(@plus, gene.dataSlices{1}, s.randn(nKeepGene,1).^2);

% add signal to the first two genes

gene.dataSlices{1}(1,:) = snps.dataSlices{1}(1,:) + s.randn(1,nKeepCols);
gene.dataSlices{1}(2,:) = (snps.dataSlices{1}(2,:)==1) + s.randn(1,nKeepCols);

% add covariates to gene expression

gene.dataSlices{1} = gene.dataSlices{1} + s.randn(nKeepGene,10) * covariates';

clear s nKeepGene nKeepSnps nKeepCols;
toc;

%% Merlin
tic;
folder = 'Merlin';
if(~exist(folder,'dir'))
    mkdir(folder)
end;

[fid, msg] = fopen('Merlin\Testing.dat','w');
    if(fid==-1); error(msg); end;
    fprintf(fid,'C\tCvrt_%d\n',1:size(covariates,2));
    fprintf(fid,'T\t%s\n',gene.rowNameSlices{1}{:});
    fprintf(fid,'M\t%s\n',snps.rowNameSlices{1}{:});
fclose(fid);

[fid, msg] = fopen('Merlin\Testing.map','w');
    if(fid==-1); error(msg); end;
    z = [snps.rowNameSlices{1} num2cell((1:snps.nRows())') ]';
    fprintf(fid,'1\t%s\t%d\n',z{:});
fclose(fid);
    
toAC = [9 'A' 9 'A'; 9 'A' 9 'C';9 'C' 9 'C'];
[fid, msg] = fopen('Merlin\Testing.ped','w');
    if(fid==-1); error(msg); end;
    for s=1:gene.nCols
        fprintf(fid,'%i\t1\t0\t0\t1',s);
        fprintf(fid,'\t%f',covariates(s,:));
        fprintf(fid,'\t%f',gene.dataSlices{1}(:,s));
        z = toAC(snps.dataSlices{1}(:,s)+1,:);
        z = z';
        fprintf(fid,z(:));
        fprintf(fid,'\n');
    end;
fclose(fid);

clear ans fid msg folder toAC s z;
toc;

%% Plink
tic;
folder = 'Plink';
if(~exist(folder,'dir'))
    mkdir(folder)
end;

[fid, msg] = fopen('Plink\Testing.map','w');
    if(fid==-1); error(msg); end;
    z = [snps.rowNameSlices{1} num2cell((1:snps.nRows())') ]';
    fprintf(fid,'1\t%s\t0\t%d\n',z{:});
fclose(fid);

toAC = [9 'A' 9 'A'; 9 'A' 9 'C';9 'C' 9 'C'];
[fid, msg] = fopen('Plink\Testing.ped','w');
    if(fid==-1); error(msg); end;
    for s=1:gene.nCols
        fprintf(fid,'%i\t1\t0\t0\t1\t0',s);
        z = toAC(snps.dataSlices{1}(:,s)+1,:);
        z = z';
        fprintf(fid,z(:));
        fprintf(fid,'\n');
    end;
fclose(fid);

[fid, msg] = fopen('Plink\TestingPheno.txt','w');
    if(fid==-1); error(msg); end;
    for s=1:gene.nCols
        fprintf(fid,'%i\t1',s);
        fprintf(fid,'\t%f',gene.dataSlices{1}(:,s));
        fprintf(fid,'\n');
    end;
fclose(fid);

[fid, msg] = fopen('Plink\TestingCvrt.txt','w');
    if(fid==-1); error(msg); end;
    for s=1:gene.nCols
        fprintf(fid,'%i\t1',s);
        fprintf(fid,'\t%f',covariates(s,:));
        fprintf(fid,'\n');
    end;
fclose(fid);

clear fid ans folder msg toAC z s
toc;

%% Rqtl
tic;
folder = 'Rqtl';
if(~exist(folder,'dir'))
    mkdir(folder)
end;

[fid, msg] = fopen('Rqtl\Testing_genes.csv','w');
    if(fid==-1); error(msg); end;
    fprintf(fid,'id');
    fprintf(fid,',%d',1:gene.nCols);
    fprintf(fid,'\n');
    for i=1:gene.nRows
        fprintf(fid,'%s',gene.rowNameSlices{1}{i});
        fprintf(fid,',%f',gene.dataSlices{1}(i,:));
        fprintf(fid,'\n');
    end;
fclose(fid);

toAHB = [',A';',H';',B'];
zz = cell(3,snps.nRows);
zz(1,:) = snps.rowNameSlices{1};
zz(2,:) = num2cell(1:snps.nRows);
for i=1:snps.nRows
    z = toAHB(snps.dataSlices{1}(i,:)+1,:);
    z = z';
    zz{3,i} = z(:);
end;
[fid, msg] = fopen('Rqtl\Testing_snps.csv','w');
    if(fid==-1); error(msg); end;
    fprintf(fid,'id,,');
    fprintf(fid,',%d',1:gene.nCols);
    fprintf(fid,'\n');
    fprintf(fid,'%s,1,%d%s\n',zz{:});
fclose(fid);
clear z zz;

[fid, msg] = fopen('Rqtl\Testing_new_genes.csv','w');
    if(fid==-1); error(msg); end;
    fprintf(fid,'%s,',gene.rowNameSlices{1}{:});
    fprintf(fid,'id\n');
    if(fid==-1); error(msg); end;
    for s=1:gene.nCols
        fprintf(fid,'%f,',gene.dataSlices{1}(:,s));
        fprintf(fid,'%d',s);
        fprintf(fid,'\n');
    end;
fclose(fid);

[fid, msg] = fopen('Rqtl\Testing_new_snps.csv','w');
    if(fid==-1); error(msg); end;
    fprintf(fid,'id');
    fprintf(fid,',%s',snps.rowNameSlices{1}{:});
    fprintf(fid,'\n');
    fprintf(fid,',%d',1:snps.nRows());
    fprintf(fid,'\n');
    for s=1:gene.nCols
        fprintf(fid,'%d',s);
    %     fprintf(fid,'\t%s',toAC{snps.dataSlices{1}(:,s)+1});
        z = toAHB(snps.dataSlices{1}(:,s)+1,:);
        z = z';
        fprintf(fid,z(:));
        fprintf(fid,'\n');
    end;
fclose(fid);

save('Rqtl\Testing_cvrt.txt','covariates','-ascii','-tabs','-double');

clear ans folder fid msg i s toAC toAHB z
toc;

%% Matrix eQTL
tic;
folder = 'Matrix_eQTL';
if(~exist(folder,'dir'))
    mkdir(folder)
end;

[fid, msg] = fopen('Matrix_eQTL\Testing_genes.txt','w');
    if(fid==-1); error(msg); end;
    fprintf(fid,'id');
    fprintf(fid,'\t%s',gene.columnNames{:});
    fprintf(fid,'\n');
    for i=1:gene.nRows
        fprintf(fid,'%s',gene.rowNameSlices{1}{i});
        fprintf(fid,'\t%f',gene.dataSlices{1}(i,:));
        fprintf(fid,'\n');
    end;
fclose(fid);

toAC = [9 '0';9 '1';9 '2'];
    zz = cell(2,snps.nRows);
    zz(1,:) = snps.rowNameSlices{1};
    for i=1:snps.nRows
        z = toAC(snps.dataSlices{1}(i,:)+1,:);
        z = z';
        zz{2,i} = z(:);
    end;
    [fid, msg] = fopen('Matrix_eQTL\Testing_snps.txt','w');
    if(fid==-1); error(msg); end;
    fprintf(fid,'id');
    fprintf(fid,'\t%s',snps.columnNames{:});
    fprintf(fid,'\n');
    fprintf(fid,'%s%s\n',zz{:});
fclose(fid);

[fid, msg] = fopen('Matrix_eQTL\Testing_cvrt.txt','w');
    if(fid==-1); error(msg); end;
    fprintf(fid,'id');
    fprintf(fid,'\t%s',snps.columnNames{:});
    fprintf(fid,'\n');
    for i=1:size(covariates,2)
        fprintf(fid,'Cvrt_%d',i);
        fprintf(fid,'\t%d',covariates(:,i));
        fprintf(fid,'\n');
    end;
fclose(fid);

clear folder z zz ans fid msg i;
toc;

%% FastMap
tic;
folder = 'FastMap';
if(~exist(folder,'dir'))
    mkdir(folder)
end;

[fid, msg] = fopen('FastMap\Testing_genes.txt','w');
    if(fid==-1); error(msg); end;
    fprintf(fid,'id\tid');
    fprintf(fid,'\t%s',gene.columnNames{:});
    fprintf(fid,'\n');
    for i=1:gene.nRows
        fprintf(fid,'%s\t%s',gene.rowNameSlices{1}{i},gene.rowNameSlices{1}{i});
        fprintf(fid,'\t%f',gene.dataSlices{1}(i,:));
        fprintf(fid,'\n');
    end;
fclose(fid);

toAC = [9 '0';9 '1';9 '2'];
zz = cell(2,snps.nRows);
zz(1,:) = num2cell(1:snps.nRows);
for i=1:snps.nRows
    z = toAC(snps.dataSlices{1}(i,:)+1,:);
    z = z';
    zz{2,i} = z(:);
end;
[fid, msg] = fopen('FastMap\Testing_snps.txt','w');
    if(fid==-1); error(msg); end;
    fprintf(fid,'id\tid');
    fprintf(fid,'\t%s',snps.columnNames{:});
    fprintf(fid,'\n');
    fprintf(fid,'1\t%d\t%s\n',zz{:});
fclose(fid);

clear folder z zz ans fid msg i toAC;
toc;


Back to Matrix eQTL page.

By Andrey Shabalin