/MachineLearningWithMATLAB/Regression_FuelEconomy/FuelEconomyPrediction.m - Annotate - Capstone Support - The Design Lab at Rensselaer - Electronic Design Notebook

root/MachineLearningWithMATLAB/Regression_FuelEconomy/FuelEconomyPrediction.m @ 10

-            anderm8
+%% Predicting Fuel Economy
 % In this demo, we use regression trees to predict the fuel economy of
 % vehicles.
 % ---------------------------------
+%
 %   Copyright 2015 The MathWorks, Inc.
 %% Loading Excel file representing Fuel Economy Data
 CarData = importCarTable('2004dat.xlsx');
 %% Categorical Variables
 % Many of the variables represent discrete items, or categories: a car or a
 % truck, front wheel or rear wheel drive, etc.  To conserve memory and
 % accurately classify these, we'll convert them to |categorical| variables.
 CarData.Car_Truck = categorical(CarData.Car_Truck);
 CarData.Police = categorical(CarData.Police);
 CarData.Transmission = categorical(CarData.Transmission);
 CarData.Drive = categorical(CarData.Drive);
 CarData.AC = categorical(CarData.AC);
 CarData.City_Highway = categorical(CarData.City_Highway);
 % Not enough samples of each Manufacturer and Car Line (100s unique ones)
 % Year is all the same
 CarData.MfrName = [];
 CarData.CarLine = [];
 CarData.Year = [];
 % %%  Test with 10% of the Data
 % % Several of the techniques below use random numbers.  We are going
 % % to set the random number generator here to ensure repeatability.
 % rng(5)
 %% Partition Data for Cross Validation
 % cvpartition helps us create a cross-validation partition for data. We
 % create a test set (10% of data) and training set (90% of data).
 % Build cross-validation partition
 c = cvpartition(height(CarData),'holdout');
 % Extract data at indices
 mdlTrain = CarData(training(c),:);
 mdlTest  = CarData(test(c),:);
 % Extract predictors and response
 X = CarData;
 X.MPG = []; % Remove mpg
 Y = CarData.MPG;
 %% Multiple Linear Regression
 % First try multiple linear regression
 % Fit linear model
 modelLR = fitlm(mdlTrain,'ResponseVar', 'MPG');
 disp(modelLR)
 % Make prediction on test set
 yfitLR = predict(modelLR,mdlTest);
 % Show Results
 showFit(mdlTest.MPG, yfitLR)
 %% Stepwise Linear Regression
 % Stepwise linear regression adds each term to see which ones decrease the
 % error the most.
 modelSW = stepwiselm(mdlTrain,'ResponseVar', 'MPG','Upper','linear');
 disp(modelSW)
 disp(mdlTrain.Properties.VariableNames(modelSW.VariableInfo.InModel).');
 % Make prediction on test set
 yfitSW  = predict(modelSW,mdlTest);
 % Show Results
 showFit(mdlTest.MPG, yfitSW)
 % %% Support Vector Regression
 % % Stepwise linear regression adds each term to see which ones decrease the
 % % error the most.
 % modelsvm = fitrsvm(mdlTrain,'MPG');
 % disp(modelsvm)
+%
 % % Make prediction on test set
 % yfitsvm  = predict(modelsvm,mdlTest);
+%
 % % Show Results
 % showFit(mdlTest.MPG, yfitsvm)
 %% Regression Trees: Train the Tree
 % In many cases, the form of the relationship between predictors and a
 % response is unknown.  Decision trees offer a nonparametric alternative
 % for regression.
 t = fitrtree(mdlTrain,'MPG');
 t2 = prune(t,'level',250);
 % view(t2); % textual
 view(t2,'mode','graph'); % as a tree
 % Regression Trees: Evaluate the Tree
 yfitT = predict(t, mdlTest);
 % Show Results
 showFit(mdlTest.MPG, yfitT)
 %% Bagged Decision Trees
 % Bagging stands for bootstrap aggregation. Every tree in the ensemble is
 % grown on an independently drawn sample of input data. To
 % compute prediction for the ensemble of trees, fitensemble
 % takes an average of predictions from individual trees.  Ensemble
 % techniques such as bagging combining many weak learners to produce a
 % strong learner.
+%
 % To use default values:
+%
 %  tbfit = fitensemble(Xtrain,Ytrain,'Bag',100,'tree','type','regression');
+%
 % To determine how many trees to use in your ensemble:
+%
 %  treeLoss = oobLoss(tbfit,'mode','cumulative')
 %  plot(1:length(treeLoss),treeLoss)
+%
 % oobLoss (Out-of-bag regression error) computes MSE versus the number of
 % grown trees. You can use a similar technique to figure out best mininum
 % leaf size.
 ttemp = templateTree('MinLeaf',1);
 tbfit = fitensemble(mdlTrain,'MPG','Bag',100,ttemp,'type','regression');
 % Predict
 yfitTB = predict(tbfit,mdlTest);
 % Show Results
 showFit(mdlTest.MPG, yfitTB)
 %% Bagged Decision Trees: Predictor Importance
 % Predictor importance offers insight into the relative importance of each
 % predictor in the model.  It is calculated by summing changes in
 % the mean squared error (MSE) due to splits on every predictor and
 % dividing the sum by the number of branch nodes.
 figure
 pI = predictorImportance(tbfit);
 barh(pI)
 set(gca,'YTick',1:numel(X.Properties.VariableNames));
 set(gca,'YTickLabel',strrep(X.Properties.VariableNames,'_','\_'));
 xlabel('Predictor Importance')
 %% Sequential Feature Selection
 % Sequential feature selection selects a subset of features from the data
 % matrix X that best predict the data in Y by sequentially selecting
 % features until there is no improvement in prediction. We are using 3
 % fold cross-validation so we can use the whole data set here without
 % having to break up a training set and a test set.  Normally, you
 % may want to use a higher fold, but we are keeping it small for demo
 % purposes. We can then see which features to keep in our model.
 % Data needs to be numeric
 Xdummy = dummytable(X);
 XNumeric = table2array(Xdummy);
 gcp; % open a pool of workers
 opts = statset('display','iter','UseParallel','always','TolFun',1e-2);
 % use our cv partition object with 3-fold cross validation
 cv = cvpartition(height(X),'k',3);
 % Determine important features
 fs = sequentialfs(@featureTest,XNumeric,Y,'options',opts,'cv',cv);
 % Display
 disp(Xdummy.Properties.VariableNames(fs).')
 %% Treebagger with New Predictor Set
 % About the same answer but smaller set of predictors. Important for
 % computational speed, avoiding overtraining, and for general simplicity.
 % Could yield a more accurate result, as well.
 ttemp = templateTree('MinLeaf',1);
 tbfit = fitensemble(XNumeric(training(c),fs),Y(training(c)),...
     'Bag',100,ttemp,'type','regression');
 yfitFinal = predict(tbfit,XNumeric(test(c),fs));
 % Show results
 showFit(Y(test(c)), yfitFinal)
 %% Neural Networks
 % Use app to train data to _XNumeric_ and _Y_.  Then generate the script
 % and turn it into a function:
 net = trainRegressionNetwork(XNumeric,Y);
 yfitNN = net(XNumeric.');
 % Show results
 showFit(Y,yfitNN.');

Project

General

Profile

Capstone Support