在Mesos和YARN中,都用到了dominant resource fairness算法(DRF),它不同于hadoop基于slot-based实现的fair scheduler和capacity scheduler,论文阅读:Dominant Resource Fairness: Fair Allocation of
Multiple Resource Types。
考虑在一个包括多种资源类型(主要考虑CPU和MEM)的系统的公平资源分配问题,其中不同用户对资源有不同的需求。为了解决这个问题,伯克利的几位大牛提出了Dominant Resource Fairness(DRF),一种针对不同资源类型的max-min fairness。并且在Mesos的设计和实现中评估了DRF,显示了它可以比slot-based 公平调度算法得到更好的吞吐量。
/** * Makes scheduling decisions by trying to equalize dominant resource usage. * A schedulable‘s dominant resource usage is the largest ratio of resource * usage to capacity among the resource types it is using. */ @Private @Unstable public class DominantResourceFairnessPolicy extends SchedulingPolicy { public static final String NAME = "DRF"; private DominantResourceFairnessComparator comparator = new DominantResourceFairnessComparator(); @Override public String getName() { return NAME; } @Override public byte getApplicableDepth() { return SchedulingPolicy.DEPTH_ANY; } @Override public Comparator<Schedulable> getComparator() { return comparator; } @Override public void computeShares(Collection<? extends Schedulable> schedulables, Resource totalResources) { for (ResourceType type : ResourceType.values()) { ComputeFairShares.computeShares(schedulables, totalResources, type); } } @Override public void initialize(Resource clusterCapacity) { comparator.setClusterCapacity(clusterCapacity); } public static class DominantResourceFairnessComparator implements Comparator<Schedulable> { private static final int NUM_RESOURCES = ResourceType.values().length; private Resource clusterCapacity; public void setClusterCapacity(Resource clusterCapacity) { this.clusterCapacity = clusterCapacity; } @Override public int compare(Schedulable s1, Schedulable s2) { ResourceWeights sharesOfCluster1 = new ResourceWeights(); ResourceWeights sharesOfCluster2 = new ResourceWeights(); ResourceWeights sharesOfMinShare1 = new ResourceWeights(); ResourceWeights sharesOfMinShare2 = new ResourceWeights(); ResourceType[] resourceOrder1 = new ResourceType[NUM_RESOURCES]; ResourceType[] resourceOrder2 = new ResourceType[NUM_RESOURCES]; // Calculate shares of the cluster for each resource both schedulables. calculateShares(s1.getResourceUsage(), clusterCapacity, sharesOfCluster1, resourceOrder1, s1.getWeights()); calculateShares(s1.getResourceUsage(), s1.getMinShare(), sharesOfMinShare1, null, ResourceWeights.NEUTRAL); calculateShares(s2.getResourceUsage(), clusterCapacity, sharesOfCluster2, resourceOrder2, s2.getWeights()); calculateShares(s2.getResourceUsage(), s2.getMinShare(), sharesOfMinShare2, null, ResourceWeights.NEUTRAL); // A queue is needy for its min share if its dominant resource // (with respect to the cluster capacity) is below its configured min share // for that resource boolean s1Needy = sharesOfMinShare1.getWeight(resourceOrder1[0]) < 1.0f; boolean s2Needy = sharesOfMinShare2.getWeight(resourceOrder2[0]) < 1.0f; int res = 0; if (!s2Needy && !s1Needy) { res = compareShares(sharesOfCluster1, sharesOfCluster2, resourceOrder1, resourceOrder2); } else if (s1Needy && !s2Needy) { res = -1; } else if (s2Needy && !s1Needy) { res = 1; } else { // both are needy below min share res = compareShares(sharesOfMinShare1, sharesOfMinShare2, resourceOrder1, resourceOrder2); } if (res == 0) { // Apps are tied in fairness ratio. Break the tie by submit time. res = (int)(s1.getStartTime() - s2.getStartTime()); } return res; } /** * Calculates and orders a resource‘s share of a pool in terms of two vectors. * The shares vector contains, for each resource, the fraction of the pool that * it takes up. The resourceOrder vector contains an ordering of resources * by largest share. So if resource=<10 MB, 5 CPU>, and pool=<100 MB, 10 CPU>, * shares will be [.1, .5] and resourceOrder will be [CPU, MEMORY]. */ void calculateShares(Resource resource, Resource pool, ResourceWeights shares, ResourceType[] resourceOrder, ResourceWeights weights) { shares.setWeight(MEMORY, (float)resource.getMemory() / (pool.getMemory() * weights.getWeight(MEMORY))); shares.setWeight(CPU, (float)resource.getVirtualCores() / (pool.getVirtualCores() * weights.getWeight(CPU))); // sort order vector by resource share if (resourceOrder != null) { if (shares.getWeight(MEMORY) > shares.getWeight(CPU)) { resourceOrder[0] = MEMORY; resourceOrder[1] = CPU; } else { resourceOrder[0] = CPU; resourceOrder[1] = MEMORY; } } } private int compareShares(ResourceWeights shares1, ResourceWeights shares2, ResourceType[] resourceOrder1, ResourceType[] resourceOrder2) { for (int i = 0; i < resourceOrder1.length; i++) { int ret = (int)Math.signum(shares1.getWeight(resourceOrder1[i]) - shares2.getWeight(resourceOrder2[i])); if (ret != 0) { return ret; } } return 0; } } }
原文:http://blog.csdn.net/pelick/article/details/19326865