第一个函数是计算熵的函数
<span style="font-size:18px;">function result=CEntropy(propertyList)
result=0;
totalLength=length(propertyList);
itemList=unique(propertyList);
pNum=length(itemList);
for i=1:pNum
itemLength=length(find(propertyList==itemList(i)));
pItem=itemLength/totalLength;
result=result-pItem*log2(pItem);
end</span><span style="font-size:18px;">function decisionTreeModel=decisionTree(data,label,propertyName)
global rootNode;
global Node;
rootNode=struct('NodeName',[]);
Node=struct('fatherNodeName',[],'EdgeProperty',[],'NodeName',[]);
rootIndex=CalcuteNode(data,label);
dataRowIndex=setdiff(1:length(propertyName),rootIndex);
rootNode.NodeName=propertyName(rootIndex);
propertyName(rootIndex)=[];
rootData=data(:,rootIndex);
sonEdge=unique(rootData);
for i=1:length(sonEdge)
edgeDataIndex=find(rootData==sonEdge(i));
BuildTree(rootNode.NodeName,sonEdge(i),data(edgeDataIndex,dataRowIndex),label(edgeDataIndex,:),propertyName);
end
model.rootNode=rootNode;
model.Node=Node;
decisionTreeModel=model;</span><span style="font-size:18px;">function [ output_args ] = BuildTree(fatherNodeName,edge,data,label,propertyName)
%UNTITLED9 Summary of this function goes here
% Detailed explanation goes here
global rootNode;
global Node;
% rootNode=struct('NodeName',[]);
% Node=struct('fatherNodeName',[],'EdgeProperty',[],'NodeName',[]);
k=length(Node)+1;
Node(k).fatherNodeName=fatherNodeName;
Node(k).EdgeProperty=edge;
if length(unique(label))==1
Node(k).NodeName=label(1);
return;
end
sonIndex=CalcuteNode(data,label);
dataRowIndex=setdiff(1:length(propertyName),sonIndex);
Node(k).NodeName=propertyName(sonIndex);
propertyName(sonIndex)=[];
sonData=data(:,sonIndex);
sonEdge=unique(sonData);
for i=1:length(sonEdge)
edgeDataIndex=find(sonData==sonEdge(i));
BuildTree(Node(k).NodeName,sonEdge(i),data(edgeDataIndex,dataRowIndex),label(edgeDataIndex,:),propertyName);
end
end
</span><span style="font-size:18px;">function [NodeIndex]=CalcuteNode(data,label)
LargeEntropy=CEntropy(label);
[m,n]=size(data);
EntropyGain=LargeEntropy*ones(1,n);
for i=1:n
pData=data(:,i);
itemList=unique(pData);
for j=1:length(itemList)
itemIndex=find(pData==itemList(j));
EntropyGain(i)=EntropyGain(i)-length(itemIndex)/m*CEntropy(label(itemIndex));
end
% 此处运行则为增益率,注释掉则为增益
% EntropyGain(i)=EntropyGain(i)/CEntropy(pData);
end
[~,NodeIndex]=max(EntropyGain);
end</span>接下来,测试用主函数:
<span style="font-size:18px;">clear;clc;
OutlookType=struct('Sunny',1,'Rainy',2,'Overcast',3);
TemperatureType=struct('hot',1,'warm',2,'cool',3);
HumidityType=struct('high',1,'norm',2);
WindyType={'True',1,'False',0};
PlayGolf={'Yes',1,'No',0};
data=struct('Outlook',[],'Temperature',[],'Humidity',[],'Windy',[],'PlayGolf',[]);
Outlook=[1,1,3,2,2,2,3,1,1,2,1,3,3,2]';
Temperature=[1,1,1,2,3,3,3,2,3,3,2,2,1,2]';
Humidity=[1,1,1,1,2,2,2,1,2,2,2,1,2,1]';
Windy=[0,1,0,0,0,1,1,0,0,0,1,1,0,1]';
data=[Outlook Temperature Humidity Windy];
PlayGolf=[0,0,1,1,1,0,1,0,1,1,1,1,1,0]';
propertyName={'Outlook','Temperature','Humidity','Windy'};
decisionTreeModel=decisionTree(data,PlayGolf,propertyName);</span>原文:http://blog.csdn.net/zhangzhengyi03539/article/details/44786951