idea提交任务到yarn中运行

时间：2021-05-28 19:18:57 阅读：20 评论：0 收藏：0 [点我收藏+]

之前运行hadoop的方式是首先编写好程序，然后再将程序打包成jar包，然后上传到服务器中运行。

现在的有一种方法是通过在本地idea中可以将jar包提交到远程集群中运行。

简单的代码

一个简单的例子：


import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import java.io.IOException;
import java.net.URI;

public class WordCount {

    public static class WordCountMapper extends Mapper<LongWritable, Text, Text, IntWritable>{
        private final static IntWritable result = new IntWritable(1);
        private final static Text Key = new Text();
        @Override
        protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
//            super.map(key, value, context);
            String line = value.toString();
            String[] words = line.split(" ");
            for(String word: words){
                Key.set(word);
                context.write(Key, result);
            }

        }
    }


    public static class WordCountReduce extends Reducer<Text, IntWritable, Text, IntWritable>{
        private static final IntWritable result = new IntWritable();

        @Override
        protected void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
            int sum = 0;
            for (IntWritable value: values){
                sum += value.get();
            }
            result.set(sum);
            context.write(key, result);
        }
    }

    public static void main(String[] args) throws Exception {
        // 设置Hadoop的用户名
        System.setProperty("HADOOP_USER_NAME", "611");
        Configuration conf = new Configuration();
        conf.set("fs.defaultFS", "hdfs://192.168.1.237:9000");
        conf.set("mapreduce.app-submission.cross-platform", "true");
        //jar包运行地址
        conf.set("mapred.jar", "D:\\MyFile\\实验室项目\\2021大数据项目\\out\\artifacts\\wordCount\\wordCount.jar");
        conf.set("fs.hdfs.impl", org.apache.hadoop.hdfs.DistributedFileSystem.class.getName());
        // 设置yarn连接
        //指定是job运行在yarn上，默认local
        conf.set("mapreduce.framework.name", "yarn");
        //解决ould only be replicated to 0 nodes instead of minReplication (=1).
        //There are 1 datanode(s) running and 1 node(s) are excluded in this operation问题
        conf.set("yarn.resourcemanager.hostname", "master");
        // client通过hostname可以连接datanode
        conf.set("dfs.client.use.datanode.hostname", "true");

        Job job = Job.getInstance(conf, "wordCount"); //任务名称
        job.setJarByClass(WordCount.class);

        String inputFile = "hdfs://192.168.1.237:9000/test/wordCount.txt";
        String outFile = "hdfs://192.168.1.237:9000/test/output";

        // 删除out目录
        FileSystem fs = FileSystem.get(new URI("hdfs://192.168.1.237:9000"), conf);
        if(fs.exists(new Path(outFile)))
            fs.delete(new Path(outFile), true);

        job.setMapperClass(WordCountMapper.class);
        job.setReducerClass(WordCountReduce.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(IntWritable.class);

        FileInputFormat.addInputPath(job, new Path(inputFile));
        FileOutputFormat.setOutputPath(job, new Path(outFile));

        System.exit(job.waitForCompletion(true) ? 0: 1);
    }
}

其中有一些设置关键的地方：

yarn.resourcemanager.hostname：设置提交的client的名字，一般是自己在yarn-site.xml设置的内容
mapreduce.app-submission.cross-platform：这个是设置app跨平台提交，设置为true
mapred.jar：这个就是本程序中需要生成的jar包（必须生成jar包，才能进行远程提交任务，如果生成jar不正确，将会导致任务的失败！！），在示例中的mainClass就是WordCount

依赖

然后在这个程序中需要导入到包包括基本的hadoop、yarn的依赖包，具体需要不需要哪些，我把所有可能依赖的包全部都加入进去了，但是有一些依赖可能不是必须的，所以大家可以自己尝试一下。

<dependencies>

        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-common</artifactId>
            <version>2.7.7</version>
        </dependency>
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-client</artifactId>
            <version>2.7.7</version>
        </dependency>

        <!-- https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-mapreduce-client-core -->
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-mapreduce-client-core</artifactId>
            <version>2.7.7</version>
        </dependency>


        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-mapreduce-client-shuffle</artifactId>
            <version>2.7.7</version>
        </dependency>

        <!-- https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-yarn-client -->
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-yarn-client</artifactId>
            <version>2.7.7</version>
        </dependency>
        <!-- https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-yarn-api -->
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-yarn-api</artifactId>
            <version>2.7.7</version>
        </dependency>
        <!-- https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-yarn-common -->
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-yarn-common</artifactId>
            <version>2.7.7</version>
        </dependency>


        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-hdfs</artifactId>
            <version>2.7.7</version>
        </dependency>

        <!-- https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-mapreduce-client-jobclient -->
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-mapreduce-client-jobclient</artifactId>
            <version>2.7.7</version>
        </dependency>

        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-mapreduce-client-common</artifactId>
            <version>2.7.7</version>
        </dependency>
    </dependencies>

配置xml

在使用这个远程提交的时候，有一些yarn-site.xml和mapred-site.xml必须得配置好：

mapred-site.xml中的一些有关配置：

<configuration>
	<property>
        <name>mapreduce.framework.name</name>
        <value>yarn</value>
    </property>
	<!-- 配置的是history的查看地址 -->
	<property>
		<name>mapreduce.jobhistory.address</name>
		<value>master:10020</value>
	</property>
	<!-- 配置的是history网页的查看地址 -->
	<property>
		<name>mapreduce.jobhistory.webapp.address</name>
		<value>master:19888</value>
	</property>
</configuration>

yarn-site.xml中的一些有关配置:

	<property>
          <name>yarn.nodemanager.aux-services</name>
          <value>mapreduce_shuffle</value>
      </property>
      <!-- 配置yarn的resourcemanager的主机，这个是必须配置的 -->
      <property>
          <name>yarn.resourcemanager.hostname</name>
          <value>master</value>
      </property>
       <property>
          <name>yarn.nodemanager.pmem-check-enabled</name>
          <value>false</value>
      </property>
      <property>
          <name>yarn.nodemanager.vmem-check-enabled</name>
          <value>false</value>
      </property>
	  <!-- 配置yarn运行过程中产生的记录聚合 -->
	  <property>
        <name>yarn.log-aggregation-enable</name>
        <value>true</value>
    </property>
	<!-- 聚合后的history的查看地址 -->
	<property>
		<name>yarn.log.server.url</name>
		<value>master:19888/jobhistory</value>
	</property>

如果需要启动hadoop的history记录服务，必须是单独的启动

${HADOOP_HOME}/sbin/mr-jobhistory-daemon.sh start historyserver

查看运行的job

可以通过master:8088查看yarn运行的任务和状态
技术分享图片
如果想要查看history的，必须开启history server，如上面的命令所示

idea提交任务到yarn中运行

原文：https://www.cnblogs.com/liulongtao/p/14823405.html

踩

(0)

评论一句话评论（0）

分享档案

更多>

2021年09月23日 (328)
2021年09月24日 (313)
2021年09月17日 (191)
2021年09月15日 (369)
2021年09月16日 (411)
2021年09月13日 (439)
2021年09月11日 (398)
2021年09月12日 (393)
2021年09月10日 (160)
2021年09月08日 (222)