root/MachineLearningWithMATLAB/Regression_FuelEconomy/dummytable.m @ 11
10 | anderm8 | function Tdummy = dummytable(T)
|
|
% Tdummy = dummytable(T) - convert categorical variables in table to dummy
|
|||
% variables
|
|||
%
|
|||
% This functions takes the categorical variables in a table and converts
|
|||
% them to separate dummy variables with intelligent names. This way they
|
|||
% can be used in the Classification Learner App and the variable names make
|
|||
% sense for feature selection, etc.
|
|||
%
|
|||
% Usage:
|
|||
%
|
|||
% Tdummy = dummytable(T)
|
|||
%
|
|||
% Inputs:
|
|||
%
|
|||
% T: Table with categoricals or categorical variable
|
|||
%
|
|||
% Outputs:
|
|||
%
|
|||
% Tdummy: T with categorical variables turned into dummy variables with
|
|||
% intelligent names
|
|||
%
|
|||
% Example:
|
|||
%
|
|||
% % Simple Table
|
|||
% T = table(rand(10,1),categorical(cellstr('rbbgbgbbgr'.')),...
|
|||
% 'VariableNames',{'Percent','Color'});
|
|||
% disp(T)
|
|||
%
|
|||
% % Turn it into a dummy table
|
|||
% Tdummy = dummytable(T);
|
|||
% disp(Tdummy)
|
|||
%
|
|||
% See Also: dummyvar, table, categorical, classificationLearner
|
|||
% Copyright 2015 The MathWorks, Inc.
|
|||
% Sean de Wolski Apr 13, 2014
|
|||
% Error checking
|
|||
narginchk(1,1)
|
|||
validateattributes(T,{'categorical', 'table'},{},mfilename,'T',1);
|
|||
% If it's a categorical, do out best to convert it to a table with an
|
|||
% intelligent variable name
|
|||
if iscategorical(T)
|
|||
% Try to use existing variable name
|
|||
cname = inputname(1);
|
|||
if isempty(cname)
|
|||
% It's a MATLAB Expression, default to Var1
|
|||
cname = 'Var1';
|
|||
end
|
|||
T = table(T,'VariableNames',{cname});
|
|||
end
|
|||
% Identify categoricals and their names
|
|||
cats = varfun(@iscategorical,T,'OutputFormat','uniform');
|
|||
% Short circuit if there are no categoricals
|
|||
if ~any(cats)
|
|||
Tdummy = T;
|
|||
return
|
|||
end
|
|||
% Store everything in a cell. w will be the total width of the table
|
|||
% with each variable dummyvar'd
|
|||
w = nnz(~cats)+sum(varfun(@(x)numel(categories(x)),T(:,cats),'OutputFormat','uniform'));
|
|||
% Preallocate storage
|
|||
datastorage = cell(1,w);
|
|||
namestorage = cell(1,w);
|
|||
% Engine
|
|||
idx = 0; % Start nowhere in cell
|
|||
for ii = 1:width(T)
|
|||
idx = idx+1;
|
|||
% Loop over table deciding what to do with each variable
|
|||
if cats(ii)
|
|||
% It's a categorical,
|
|||
% Extract it and build keep its categories and dummyvar
|
|||
Tii = T{:,ii};
|
|||
categoriesii = categories(Tii)';
|
|||
ncatii = numel(categoriesii); % How many?
|
|||
% Build dummy var as a row cell with columns in each
|
|||
dvii = num2cell(dummyvar(Tii), 1); % Dummy var then cell
|
|||
% Build names
|
|||
namesii = strcat(T.Properties.VariableNames{ii}, '_', categoriesii);
|
|||
% Insert
|
|||
datastorage(idx:(idx+ncatii-1)) = dvii;
|
|||
namestorage(idx:(idx+ncatii-1)) = namesii;
|
|||
% Increment
|
|||
idx = idx+ncatii-1;
|
|||
else
|
|||
% Extract non categorical into current storage location
|
|||
datastorage{idx} = T{:,ii};
|
|||
namestorage(idx) = T.Properties.VariableNames(ii);
|
|||
end
|
|||
end
|
|||
% Build Tdummy with comma separated list expansion
|
|||
Tdummy = table(datastorage{:},'VariableNames',matlab.lang.makeValidName(namestorage));
|
|||
end
|