[atguigu@hadoop102 software]$ tar -zxvf elasticsearch-5.2.2.tar.gz -C /opt/module/
[atguigu@hadoop102 config]$ vim elasticsearch.yml
network.host: hadoop102
http.port: 9200
http.cors.enabled: true
http.cors.allow-origin: "*"
bootstrap.memory_lock: false
bootstrap.system_call_filter: false
[atguigu@hadoop102 elasticsearch-5.2.2]$ sudo vim /etc/security/limits.conf #添加如下内容 * soft nproc 65536 * hard nproc 65536 * soft nofile 65536 * hard nofile 65536
[atguigu@hadoop102 elasticsearch-5.2.2]$ sudo vim /etc/sysctl.conf #添加 vm.max_map_count=655360
[atguigu@hadoop102 elasticsearch-5.2.2]$ sudo vim /etc/security/limits.d/90-np #修改配置 * soft nproc 2048
[atguigu@hadoop102 elasticsearch-5.2.2]$ sudo sysctl -p
[atguigu@hadoop102 elasticsearch-5.2.2]$ su root root@hadoop102 elasticsearch-5.2.2]# reboot
[atguigu@hadoop102 elasticsearch-5.2.2]$ nohup /opt/module/elasticsearch-5.2.2/bin/elasticsearch &
[atguigu@hadoop102 ~]$ curl -XPUT http://hadoop102:9200/griffin -d ‘ { "aliases": {}, "mappings": { "accuracy": { "properties": { "name": { "fields": { "keyword": { "ignore_above": 256, "type": "keyword" } }, "type": "text" }, "tmst": { "type": "date" } } } },"settings": { "index": { "number_of_replicas": "2", "number_of_shards": "5" } } }‘
[atguigu@hadoop102 hadoop-2.7.2]$ sbin/start-dfs.sh
[atguigu@hadoop103 hadoop-2.7.2]$ sbin/start-yarn.sh
[atguigu@hadoop102 module]$ cp /opt/software/mysql-libs/mysql-connector-java-5.1.27/mysql-con nector-java-5.1.27-bin.jar /opt/module/hive/lib/
[atguigu@hadoop102 conf]$ vim hive-site.xml #添加如下内容 <?xml version="1.0"?> <?xml-stylesheet type="text/xsl" href="configuration.xsl"?> <configuration> <property> <name>javax.jdo.option.ConnectionURL</name> <value>jdbc:mysql://hadoop102:3306/metastore?createDatabaseIfNotExist=tru e</value> <description>JDBC connect string for a JDBC metastore</description> </property> <property> <name>javax.jdo.option.ConnectionDriverName</name> <value>com.mysql.jdbc.Driver</value> <description>Driver class name for a JDBC metastore</description> </property> <property> <name>javax.jdo.option.ConnectionUserName</name> <value>root</value> <description>username to use against metastore database</description> </property> <property> <name>javax.jdo.option.ConnectionPassword</name> <value>123456</value> <description>password to use against metastore database</description>
</property>
<property>
<name>hive.metastore.warehouse.dir</name>
<value>/user/hive/warehouse</value>
<description>location of default database for the warehouse</description>
</property>
<property>
<name>hive.cli.print.header</name>
<value>true</value>
</property>
<property>
<name>hive.cli.print.current.db</name>
<value>true</value> </property>
<property>
<name>hive.metastore.schema.verification</name>
<value>false</value>
</property>
<property>
<name>datanucleus.schema.autoCreateAll</name>
<value>true</value>
</property>
<property>
<name>hive.metastore.uris</name>
<value>thrift://hadoop102:9083</value>
</property>
</configuration>
[atguigu@hadoop102 hive]$ nohup /opt/module/hive/bin/hive --service metastore &
[atguigu@hadoop102 hive]$ nohup /opt/module/hive/bin/hive --service hiveserver2 &
[atguigu@hadoop102 hive]$ /opt/module/hive/bin/hive
[atguigu@hadoop102 software]$ tar -zxvf spark-2.4.6-bin-hadoop2.7.tgz -C /opt/module/
[atguigu@hadoop102 module]$ mv spark-2.4.3-bin-hadoop2.7/ spark
[atguigu@hadoop102 conf]$ mv spark-defaults.conf.template spark-defaults.conf
[atguigu@hadoop102 conf]$ vim spark-defaults.conf
#添加如下配置
spark.eventLog.enabled true
spark.eventLog.dir
hdfs://hadoop102:9000/spark_directory
[atguigu@hadoop102 conf]$ mv slaves.template slaves
[atguigu@hadoop102 conf]$ vim slaves
hadoop102
hadoop103
hadoop104
[atguigu@hadoop102 conf]$ mv spark-env.sh.template spark-env.sh
[atguigu@hadoop102 conf]$ vim spark-env.sh
#添加如下参数 YARN_CONF_DIR=/opt/module/hadoop-2.7.2/etc/hadoop
export SPARK_HISTORY_OPTS="-Dspark.history.ui.port=18080
-Dspark.history.retainedApplications=30
-Dspark.history.fs.logDirectory=hdfs://hadoop102:9000/spark_di rectory"
SPARK_MASTER_HOST=hadoop102
SPARK_MASTER_PORT=7077
[atguigu@hadoop102 spark]$ hadoop fs -mkdir /spark_directory
[atguigu@hadoop102 lib]$ cp /opt/module/hive/lib/datanucleus-*.jar /opt/module/spark/jars/
[atguigu@hadoop102 conf]$ cp /opt/module/hive/conf/hive-site.xml /opt/module/spark/conf/
[atguigu@hadoop102 hadoop]$ vi yarn-site.xml <!--是否启动一个线程检查每个任务正使用的物理内存量,如果任务超出分配值, 则直接将其杀掉,默认是 true --> <property> <name>yarn.nodemanager.pmem-check-enabled</name> <value>false</value> </property> <!--是否启动一个线程检查每个任务正使用的虚拟内存量,如果任务超出分配值, 则直接将其杀掉,默认是 true --> <property> <name>yarn.nodemanager.vmem-check-enabled</name> <value>false</value> </property>
[atguigu@hadoop102 conf]$ xsync /opt/module/hadoop-2.7.2/etc/hadoop/yarn-site.xml
[atguigu@hadoop102 conf]$ xsync /opt/module/spark
[atguigu@hadoop102 spark]$ bin/spark-shell
scala>spark.sql("show databases").show
[atguigu@hadoop102 software]$ unzip livy-server-0.3.0.zip -d /opt/module/
[atguigu@hadoop102 module]$ mv livy-server-0.3.0/ livy
export HADOOP_CONF_DIR=/opt/module/hadoop-2.7.2/etc/hadoop/
export SPARK_HOME=/opt/module/spark/
livy.server.host = hadoop102
livy.spark.master =yarn
livy.spark.deployMode = client
livy.repl.enableHiveContext = true
livy.server.port = 8998
[atguigu@hadoop102 conf]$ sudo vim /etc/profile
#SPARK_HOME
export SPARK_HOME=/opt/module/spark
export PATH=$PATH:$SPARK_HOME/bin
[atguigu@hadoop102 conf]$ source /etc/profile
[atguigu@hadoop102 livy]$ bin/livy-server start
[atguigu@hadoop102 ~]$ mysql -uroot -p123456
mysql> create database quartz;
mysql> use quartz;
mysql> source /opt/software/Init_quartz_mysql_innodb.sql
mysql> show tables;
[atguigu@hadoop102 ~]$ mysql -uroot -p123456
mysql> create database quartz;
mysql> use quartz;
mysql> source /opt/software/Init_quartz_mysql_innodb.sql
mysql> show tables;
[atguigu@hadoop102 software]$ tar -zxvf apache-maven-3.6.1-bin.tar.gz -C /opt/module/
[atguigu@hadoop102 module]$ mv apache-maven-3.6.1/ maven
[atguigu@hadoop102 module]$ sudo vim /etc/profile #MAVEN_HOME
export MAVEN_HOME=/opt/module/maven
export PATH=$PATH:$MAVEN_HOME/bin
[atguigu@hadoop102 module]$ source /etc/profile
[atguigu@hadoop102 module]$ mvn -v
[atguigu@hadoop102 maven]$ cd conf
[atguigu@hadoop102 maven]$ vim settings.xml
<!-- 添加阿里云镜像--> <mirror> <id>nexus-aliyun</id> <mirrorOf>central</mirrorOf> <name>Nexus aliyun</name> <url>http://maven.aliyun.com/nexus/content/groups/public</ur l> </mirror> <mirror> <id>UK</id> <name>UK Central</name> <url>http://uk.maven.org/maven2</url> <mirrorOf>central</mirrorOf> </mirror> <mirror> <id>repo1</id> <mirrorOf>central</mirrorOf> <name>Human Readable Name for this Mirror.</name> <url>http://repo1.maven.org/maven2/</url> </mirror> <mirror> <id>repo2</id> <mirrorOf>central</mirrorOf> <name>Human Readable Name for this Mirror.</name> <url>http://repo2.maven.org/maven2/</url> </mirror>
[atguigu@hadoop102 ~]$ mkdir .m2
[atguigu@hadoop102 software]$ unzip griffin-master.zip -d /opt/module/
[atguigu@hadoop102 ui]$ vim pom.xml <!-- It will install nodejs and npm --> <execution> <id>install node and npm</id> <goals> <goal>install-node-and-npm</goal> </goals> <configuration> <nodeVersion>${node.version}</nodeVersion> <npmVersion>${npm.version}</npmVersion> <nodeDownloadRoot>http://nodejs.org/dist/</nodeDownloadRoot>
<npmDownloadRoot>http://registry.npmjs.org/npm/-/</npmDownloadRoot> </configuration> </execution>
[atguigu@hadoop102 service]$ vim pom.xml <!-- <dependency> <groupId>org.postgresql</groupId> <artifactId>postgresql</artifactId> <version>${postgresql.version}</version> </dependency> --> <dependency> <groupId>mysql</groupId> <artifactId>mysql-connector-java</artifactId>
</dependency> 注意:版本号删除掉
[atguigu@hadoop102 service]$ vim /opt/module/griffin-master/service/src/main/resources/applicat ion.properties # Apache Griffin 应用名称 spring.application.name=griffin_service # MySQL 数据库配置信息 spring.datasource.url=jdbc:mysql://hadoop102:3306/quartz?autoR econnect=true&useSSL=false spring.datasource.username=root spring.datasource.password=000000 spring.jpa.generate-ddl=true spring.datasource.driver-class-name=com.mysql.jdbc.Driver spring.jpa.show-sql=true # Hive metastore 配置信息 hive.metastore.uris=thrift://hadoop102:9083 hive.metastore.dbname=default hive.hmshandler.retry.attempts=15 hive.hmshandler.retry.interval=2000ms # Hive cache time cache.evict.hive.fixedRate.in.milliseconds=900000 # Kafka schema registry 按需配置 kafka.schema.registry.url=http://hadoop102:8081 # Update job instance state at regular intervals jobInstance.fixedDelay.in.milliseconds=60000 # Expired time of job instance which is 7 days that is 604800000 milliseconds.Time unit only supports milliseconds jobInstance.expired.milliseconds=604800000 # schedule predicate job every 5 minutes and repeat 12 times at most #interval time unit s:second m:minute h:hour d:day,only support these four units predicate.job.interval=5m predicate.job.repeat.count=12 # external properties directory location external.config.location= # external BATCH or STREAMING env external.env.location= # login strategy ("default" or "ldap") login.strategy=default # ldap ldap.url=ldap://hostname:port ldap.email=@example.com ldap.searchBase=DC=org,DC=example ldap.searchPattern=(sAMAccountName={0}) # hdfs default name fs.defaultFS= # elasticsearch elasticsearch.host=hadoop102 elasticsearch.port=9200 elasticsearch.scheme=http
[atguigu@hadoop102 service]$ vim /opt/module/griffin-master/service/src/main/resources/env/env_ batch.json { "spark": { "log.level": "INFO" }, "sinks": [
{ "type": "CONSOLE",
"config": { "max.log.lines": 10 } },
{ "type": "HDFS", "config": { "path":"hdfs://hadoop102:9000/griffin/persist", "max.persist.lines": 10000, "max.lines.per.file": 10000 } },
{ "type": "ELASTICSEARCH", "config": { "method": "post", "api": "http://hadoop102:9200/griffin/accuracy", "connection.timeout": "1m", "retry": 10 } } ],
"griffin.checkpoint": [] }
[atguigu@hadoop102 service]$ vim /opt/module/griffin-master/service/src/main/resources/env/env_ streaming.json { "spark": { "log.level": "WARN", "checkpoint.dir": "hdfs:///griffin/checkpoint/${JOB_NAME}", "init.clear": true, "batch.interval": "1m", "process.interval": "5m", "config": { "spark.default.parallelism": 4, "spark.task.maxFailures": 5, "spark.streaming.kafkaMaxRatePerPartition": 1000, "spark.streaming.concurrentJobs": 4, "spark.yarn.maxAppAttempts": 5, "spark.yarn.am.attemptFailuresValidityInterval": "1h", "spark.yarn.max.executor.failures": 120, "spark.yarn.executor.failuresValidityInterval": "1h", "spark.hadoop.fs.hdfs.impl.disable.cache": true } }, "sinks": [
{ "type": "CONSOLE", "config": { "max.log.lines": 100 } },
{ "type": "HDFS", "config": { "path": "hdfs://hadoop102:9000/griffin/persist", "max.persist.lines": 10000, "max.lines.per.file": 10000 } },
{ "type": "ELASTICSEARCH", "config": { "method": "post", "api": "http://hadoop102:9200/griffin/accuracy" } } ], "griffin.checkpoint": [{ "type": "zk", "config": { "hosts": "zk:2181", "namespace": "griffin/infocache", "lock.path": "lock", "mode": "persist", "init.clear": true, "close.clear": false } } ] }
[atguigu@hadoop102 service]$ vim /opt/module/griffin-master/service/src/main/resources/quartz.p roperties org.quartz.scheduler.instanceName=spring-boot-quartz org.quartz.scheduler.instanceId=AUTO org.quartz.threadPool.threadCount=5 org.quartz.jobStore.class=org.quartz.impl.jdbcjobstore.JobStor eTX # If you use postgresql as your database,set this property value to org.quartz.impl.jdbcjobstore.PostgreSQLDelegate # If you use mysql as your database,set this property value to org.quartz.impl.jdbcjobstore.StdJDBCDelegate # If you use h2 as your database, it‘s ok to set this property value to StdJDBCDelegate, PostgreSQLDelegate or others org.quartz.jobStore.driverDelegateClass=org.quartz.impl.jdbcjo bstore.StdJDBCDelegate
org.quartz.jobStore.useProperties=true org.quartz.jobStore.misfireThreshold=60000 org.quartz.jobStore.tablePrefix=QRTZ_ org.quartz.jobStore.isClustered=true org.quartz.jobStore.clusterCheckinInterval=20000
[atguigu@hadoop102 griffin-master]$ mvn -Dmaven.test.skip=true clean install
离线电商数仓(六十六)之数据质量监控(二)Griffin(三) 安装及使用(一)
原文:https://www.cnblogs.com/qiu-hua/p/13747505.html