瀏覽代碼

上传数据探查文档

Noth 10 月之前
父節點
當前提交
eb853058b2

+ 984 - 0
算法平台项目/算法平台安装文档/数据探查服务安装文档(成功安装在华为云版).md

@@ -0,0 +1,984 @@
+# 数据探查服务安装(华为云)文档
+
+[TOC]
+
+## 1. 总述
+
+### 1.1 安装的软件信息及版本
+
+1. JDK 1.8
+2. MySQL 5.7.28
+3. hadoop 3.1.3
+4. hive 3.1.2
+5. scala 2.11.6
+6. spark-2.2.1
+7. livy-0.7.1
+8. ElasticSearch-5.0.0
+9. Griffin-0.6.0
+
+### 1.2 安装文件路径
+
+所有的安装文件均在目录:/opt/modules/     ,如下图所示:![image-20220606152613846](图片/image-20220606152613846-16545003762937.png)
+
+​    
+
+## 2、软件安装
+
+### 2.1 MySQL的配置
+
+* 创建数据库并导入官方提供的SQL文件:Init_quartz_mysql_innodb.sql
+
+1. 创建数据库quartz
+
+```shell
+mysql -u <username> -e "create database quartz" -p
+e.g. mysql -u root -e "create database quartz" -p
+```
+
+2. 导入SQL文件
+
+```shell
+mysql -u <username> -p quartz < Init_quartz_mysql_innodb.sql
+e.g: mysql -u root -p quartz < Init_quartz_mysql_innodb.sql
+```
+
+注意:记得检查是否创建成功,确保每一步都没有错误。
+
+### 2.2 Hadoop部署
+
+说明hadoop一般已经安装成功,所以现在只需要查看更改里面的配置文件
+
+1. 查看**./etc/hadoop/core-site.xml**  是否存在以下配置,没有加上。 注意:别忘记配置同步到每个节点上
+
+```xml
+<configuration>
+    <property>
+        <name>fs.defaultFS</name>
+          <!--  ip:换成实际的     -->
+        <value>hdfs://ip:9000</value>  
+    </property>    
+</configuration>
+```
+
+2. 在hadoop安装目录的data下即:`hadoop-3.1.3/data/hadoop-data`  创建文件夹nn dn snn,并分发到其他节点
+
+   ```shell
+   //创建 文件夹
+   [root@master hadoop-data]# mkdir nn
+   [root@master hadoop-data]# mkdir dn
+   [root@master hadoop-data]# mkdir snn
+   [root@master hadoop-data]# cd ..
+   //分发 :这里的ip和路径根据实际情况更改
+   //分发到节点1
+   [root@master data]# scp -r hadoop-data/ root@10.168.57.11:/opt/modules/hadoop-3.1.3/data/
+   //分发到节点2
+   [root@master data]# scp -r hadoop-data/ root@10.168.57.12:/opt/modules/hadoop-3.1.3/data/
+   ```
+
+3. 查看  **./etc/hadoop/hdfs-site.xml**  是否存在以下配置,没有加上。  注意:别忘记配置同步到每个节点上
+
+   1. 配置**hdfs-site.xml**文件
+
+   ```xml
+   <configuration>
+   <!-- 指定Hadoop辅助名称节点主机配置   这里的slave1是节点2ip的映射 -->
+   <property>
+         <name>dfs.namenode.secondary.http-address</name>
+         <value>slave1:50090</value>
+   </property>
+   
+       <property>
+           <name>dfs.namenode.logging.level</name>
+           <value>warn</value>
+       </property>
+       <property>
+           <name>dfs.replication</name>
+           <value>1</value>
+       </property>
+       <!--  nn dn snn 需要自己创建 并将目录改成本地的路径配置   -->
+   <property>
+           <name>dfs.namenode.name.dir</name>
+           <value>/opt/modules/hadoop-3.1.3/data/hadoop-data/nn</value>
+       </property>
+       <property>
+           <name>dfs.datanode.data.dir</name>
+           <value>/opt/modules/hadoop-3.1.3/data/hadoop-data/dn</value>
+       </property>
+       <property>
+           <name>dfs.namenode.checkpoint.dir</name>
+           <value>/opt/modules/hadoop-3.1.3/data/hadoop-data/snn</value>
+       </property>
+       <property>
+           <name>dfs.webhdfs.enabled</name>
+           <value>true</value>
+       </property>
+       <property>
+           <name>dfs.datanode.use.datanode.hostname</name>
+           <value>false</value>
+       </property>
+       <property>
+           <name>dfs.namenode.datanode.registration.ip-hostname-check</name>
+           <value>false</value>
+       </property>
+   </configuration>
+   ```
+
+   2. 分发hdfs-site.xml到其他节点
+
+   ```shell
+   //这里的ip和目录要根据实际情况修改
+   [root@master hadoop]# scp -r hdfs-site.xml root@10.168.57.12:/opt/modules/hadoop-3.1.3/etc/hadoop/
+    
+   [root@master hadoop]# scp -r hdfs-site.xml root@10.168.57.11:/opt/modules/hadoop-3.1.3/etc/hadoop/
+   ```
+
+4. 修改 hadoop 配置文件/opt/modules/hadoop-3.1.3/etc/hadoop/yarn-site.xml, 并同步至所有节点(yarn-site.xml文件放到了hadoop文件下)
+
+   ```xml
+   <!--是否启动一个线程检查每个任务正使用的物理内存量,如果任务超出分配值,则直接将其杀掉,默认
+   是 true -->
+   <property>
+    <name>yarn.nodemanager.pmem-check-enabled</name>
+    <value>false</value>
+   </property>
+   <!--是否启动一个线程检查每个任务正使用的虚拟内存量,如果任务超出分配值,则直接将其杀掉,默认
+   是 true -->
+   <property>
+    <name>yarn.nodemanager.vmem-check-enabled</name>
+    <value>false</value>
+   </property>
+   ```
+
+5. 启动HDFS
+
+   1. 首先格式化(前面修改配置文件及添加了一些文件夹,需要对其进行格式化)
+
+      ```shell
+      hadoop namenode -format
+      ```
+
+   2. 进入安装目录并启动 
+
+     
+      ```shell
+      cd xxxx
+      #开启
+      sbin/start-dfs.sh
+      
+      # 关闭
+      sbin/stop-dfs.sh
+      ```
+
+​               启动完成后 访问  http://ip:50070/  如果是hadoop3.0及以上版本  访问  http://ip:9870/	![image-20220606152716058](图片/image-20220606152716058-16545004380689.png)
+
+
+
+5. 启动yarn
+
+   ```shell
+   # 注意 本操作是在部署 yarn 的节点上 进行
+   # 进入到hadoop的安装目录
+   cd xxxx
+   sbin/yarn-daemon.sh start resourcemanager
+   
+   # 关闭
+   sbin/yarn-daemon.sh stop resourcemanager
+   ```
+
+   启动完成后 访问 http://ip:8088/    ![image-20220606152736734](图片/image-20220606152736734-165450045827211.png)
+
+6. (可选:在华为云中未配置) 启动历史服务器 
+
+   ```shell
+   # 进入到hadoop的安装目录
+   cd xxxx
+   sbin/mr-jobhistory-daemon.sh start historyserver
+   
+   #关闭
+   sbin/mr-jobhistory-daemon.sh stop historyserver
+   ```
+
+   启动后访问 http://ip:19888/jobhistory  ![image-20220606152901770](图片/image-20220606152901770-165450054331613.png)
+
+
+
+### 2.3 hive部署
+
+1. 将 apache-hive-3.1.2-bin.tar.gz 上传到 系统中,并解压到指定目录
+
+   ```shell
+   # 解压
+   tar -zxvf /opt/software/apache-hive-3.1.2-bin.tar.gz -C /opt/modules/
+   
+   # 文件重命名
+   mv /opt/modules/apache-hive-3.1.2-bin/   /opt/modules/hive-3.1.2
+   ```
+
+2. 检查Mysql的安装,并登录mysql
+
+   ```shell
+   #登录Mysql
+   mysql -uroot -p root
+   #创建元数据库
+   create database metastore;
+   # 初始化 hive 元数据库
+    schematool -initSchema -dbType mysql -verbose
+   # 退出mysql
+    quit;
+   ```
+
+3. 配置Hive文件
+
+     a. 上传 MySQL 的 jdbc 并拷贝到 hive 的 lib 目录下
+
+   ```shell
+   cp /opt/software/mysql-connector-java5.1.37.jar $HIVE_HOME/lib
+   ```
+
+     b. 新建 hive-site.xml 
+
+   ```shell
+    vim $HIVE_HOME/conf/hive-site.xml
+   ```
+
+   c.在hive-site.xml添加如下内容(注意ip和目录,这个文件已经放入文件夹中)
+
+   ```xml
+   <?xml version="1.0" encoding="UTF-8" standalone="no"?><?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
+   <configuration>
+           <property>
+                   <name>javax.jdo.option.ConnectionURL</name>
+                   <value>jdbc:mysql://10.168.57.10:3306/metastore?useSSL=false&amp;useUnicode=true&amp;characterEncoding=UTF-8</value>
+           </property>
+   
+           <property>
+                   <name>javax.jdo.option.ConnectionDriverName</name>
+                   <value>com.mysql.jdbc.Driver</value>
+           </property>
+   
+           <property>
+                   <name>javax.jdo.option.ConnectionUserName</name>
+                   <value>root</value>
+           </property>
+   
+           <property>
+                   <name>javax.jdo.option.ConnectionPassword</name>
+                   <value>root</value>
+           </property>
+   
+           <property>
+                   <name>hive.metastore.schema.verification</name>
+                   <value>false</value>
+           </property>
+   	<property> 
+      	        <name>hive.cli.print.current.db</name>
+   	        <value>true</value>
+   	</property>
+   	<property> 
+   	        <name>hive.cli.print.header</name>
+   	        <value>true</value>
+   	</property>
+   	<!-- 这是hiveserver2 -->
+   	<property>
+          	        <name>hive.server2.thrift.port</name>
+        		<value>10000</value>
+   	</property>
+   
+       	<property>
+          		<name>hive.server2.thrift.bind.host</name>
+          		<value>10.168.58.10</value>
+        	</property>
+   
+       	<property>
+   		<name>hive.exec.post.hooks</name>
+          		<value>org.apache.atlas.hive.hook.HiveHook</value>
+        	</property>
+   <property>
+   <name>metastore.storage.schema.reader.impl</name>
+   <value>org.apache.hadoop.hive.metastore.SerDeStorageSchemaReader</value>
+   </property>
+   <property>
+        <name>hive.server2.authentication</name>
+        <value>NONE</value>
+   </property>
+   
+   <property>
+      <name>dfs.permissions.enabled</name>
+      <value>false</value>
+   </property>
+   
+   <property>
+        <name>hive.server2.enable.doAs</name>
+        <value>FALSE</value>
+   </property>
+   
+   <!-- hiveserver2的高可用参数,开启此参数可以提高hiveserver2的启动速度 -->
+   <property>
+       <name>hive.server2.active.passive.ha.enable</name>
+       <value>true</value>
+   </property>
+   <property>
+     <name>hive.fetch.task.conversion</name>
+     <value>more</value>
+     <description>
+       Expects one of [none, minimal, more].
+       Some select queries can be converted to single FETCH task minimizing latency.
+       Currently the query should be single sourced not having any subquery and should not have
+       any aggregations or distincts (which incurs RS), lateral views and joins.
+       0. none : disable hive.fetch.task.conversion
+       1. minimal : SELECT STAR, FILTER on partition columns, LIMIT only
+       2. more    : SELECT, FILTER, LIMIT only (support TABLESAMPLE and virtual columns) 简单查询不走mapreduce,加快速度
+     </description>
+   </property>
+   <!-- 以下因griffin添加 -->
+     <property>
+        <name>hive.exec.local.scratchdir</name>
+        <value>/opt/modules/hive-3.1.2/temp/hive</value>
+        <description>Local scratch space for Hive jobs</description>
+      </property>
+      <property>
+        <name>hive.downloaded.resources.dir</name>
+        <value>/opt/modules/hive-3.1.2/temp/hive/${hive.session.id}_resources</value>
+        <description>Temporary local directory for added resources in the remote file system.</description>
+      </property>
+      <property>
+        <name>hive.querylog.location</name>
+        <value>/opt/modules/hive-3.1.2/temp/hive</value>
+        <description>Location of Hive run time structured log file</description>
+      </property>
+       <property>
+       <name>hive.server2.logging.operation.log.location</name>
+        <value>/opt/modules/hive-3.1.2/temp/hive/operation_logs</value>
+      </property>
+       <!-- 指定存储元数据要连接的地址 -->
+      <property>
+      <name>hive.metastore.uris</name>
+       <value>thrift://127.0.0.1:9083</value>
+      </property>
+   
+       <!--元数据存储授权-->
+      <property>
+        <name>hive.metastore.event.db.notification.api.auth</name>
+        <value>false</value>
+      </property>
+       
+        <!-- Hive 默认在 HDFS 的工作目录 ,这个我没配置在华为云上-->
+    <!-- <property>
+       <name>hive.metastore.warehouse.dir</name>
+       <value>/user/hive/warehouse</value>
+    </property>
+   -->
+   </configuration>
+   ```
+
+   d. 启动hive元数据
+
+   ```shell
+   # 启动 hive metastore  注意:启动后窗口不能再操作,需打开一个新的 shell 窗口做别的操作(也可以使用后台运行命令启动)
+   cd $HIVE_HOME
+   bin/hive --service metastore  
+   ```
+
+   e. 测试hive 是否配置完成
+
+   ```shell
+   # 使用本地 cli 连接 hive 
+   bin/hive
+   show databases;
+   show tables;
+   ```
+
+### 2.4 Scala部署
+
+**说明:**由于spark、livy、griffin都对scala版本有要求,所以要求该版本是2.11版本,软件压缩包放在文件夹中
+
+1. 解压scala压缩包,并放入/opt/modules文件夹中
+
+   ```shell
+   [root@master tmp]# tar -zxvf scala-2.11.6.tgz -C /opt/modules/
+   ```
+
+2. 修改环境变量配置文件
+
+   ```shell
+   # scala
+   export SCALA_HOME=/opt/modules/scala-2.11.6
+   export PATH=$PATH:$SCALA_HOME/bin
+   ```
+
+3. 生效配置文件
+
+   ```shell
+   source /etc/profile  
+   ```
+
+4. 检查配置文件是否生效
+
+   ```shell
+   [root@master tmp]# cd $SCALA_HOME
+   #进入到scala-2.11.6目录下,则说明配置文件生效
+   [root@master scala-2.11.6]# 
+   ```
+
+   
+
+### 2.5 Spark (yarn 模式部署)
+
+**说明:** 需要修改的文件以及安装包,在spark下
+
+1. 将 spark-2.2.1-bin-hadoop2.7.tgz 文件上传到 linux 并解压缩,放置在指定位置。
+
+   ```shell
+   #解压缩spark安装包
+   tar -zxvf spark-2.2.1-bin-hadoop2.7.tgz -C /opt/modules
+   #进入modules目录下
+   cd /opt/modules
+   #修改名称
+   mv spark-2.2.1-bin-hadoop2.7 spark-2.2.1
+   ```
+
+2. 修改conf/spark-env.sh,添加以下配置(根据电脑的实际情况进行修改)
+
+   ```shell
+   export JAVA_HOME=/usr/java/jdk1.8.0_301
+   export SCALA_HOME=/opt/modules/scala-2.11.6
+   YARN_CONF_DIR=/opt/modules/hadoop-3.1.3/etc/hadoop
+   HADOOP_CONF_DIR=$HADOOP_HOME/etc/hadoop
+   SPARK_MASTER_HOST=master
+   SPARK_MASTER_PORT=7077
+   SPARK_MASTER_WEBUI_PORT=8082
+   SPARK_LOCAL_IP=10.168.57.10
+   SPARK_PID_DIR=/opt/modules/spark-2.2.1/pids
+   ```
+
+3. 启动Hadoop的HDFS(之前有启动就不需要管了)
+
+4. 启动spark
+
+   ```shell
+   # 进入到 spark-yarn 安装路径 
+   cd  xxx
+   sbin/start-all.sh
+   ```
+
+   启动后 访问 http://ip:8082/ ![image-20220606170004690](图片/image-20220606170004690.png)
+
+5. 测试 提交应用,验证是否安装成功
+
+   ```shell
+   bin/spark-submit \
+   --class org.apache.spark.examples.SparkPi \
+   --master yarn \
+   --deploy-mode client \
+   ./examples/jars/spark-examples_2.11-2.2.1.jar \
+   10
+   ```
+
+   ![image-20220606173207637](图片/image-20220606173207637.png)
+
+   查看yarn: http://hadoop113:8088/
+
+   ![image-20220606173226832](图片/image-20220606173226832.png)
+
+   
+
+6. 配置数据探测服务
+
+   a. 修改 conf/spark-default.conf 
+
+   ```shell
+   spark.master                    yarn-cluster
+   spark.serializer                org.apache.spark.serializer.KryoSerializer
+   spark.yarn.jars                 hdfs:///home/spark_lib/*
+   spark.yarn.dist.files		hdfs:///home/spark_conf/hive-site.xml
+   spark.sql.broadcastTimeout  500
+   ```
+
+   b. 修改  conf/spark-env.sh 
+
+   ```shell
+   HADOOP_CONF_DIR=$HADOOP_HOME/etc/hadoop
+   SPARK_MASTER_HOST=master
+   SPARK_MASTER_PORT=7077
+   SPARK_MASTER_WEBUI_PORT=8082
+   SPARK_LOCAL_IP=localhost
+   SPARK_PID_DIR=/opt/modules/spark-yarn/pids
+   ```
+
+   c. 在Hadoop中创建文件夹,并上传文件
+
+   ```shell
+   #创建文件夹
+   hdfs dfs -mkdir /home/spark_lib
+   hdfs dfs -mkdir /home/spark_conf
+   #上传文件
+   hdfs dfs -put /opt/modules/spark-yarn/jars/*  hdfs:///home/spark_lib/
+   hdfs dfs -put /opt/modules/hive-3.1.2/conf/hive-site.xml hdfs:///home/spark_conf/
+   ```
+
+   
+
+### 2.6 livy安装
+
+1. 上传、解压安装包
+
+2. 解压后,进入 conf 目录 ,拷贝 livy.conf.template 为 livy.conf,添加如下内容
+
+   ```shell
+   # 配置Livy会话所使用的Spark集群运行模式
+   
+   livy.server.host = 43.143.224.212
+   livy.spark.master = yarn 
+   # # 配置Livy会话所使用的Spark集群部署模式
+   livy.spark.deploy-mode = cluster
+   #livy.spark.deployMode = cluster
+   # //默认使用hiveContext
+   livy.repl.enableHiveContext = true
+   # //开启用户代理
+   livy.impersonation.enabled = true
+   # //设置session空闲过期时间
+   livy.server.session.timeout = 1h
+   #
+   livy.server.port = 8998
+   ```
+
+3. 拷贝 livy-env.sh.template 为 livy-env.sh,添加如下内容
+
+   ```shell
+   # 路径需要修改为自己的
+   export SPARK_HOME=/opt/modules/spark-yarn
+   export HADOOP_CONF_DIR=/opt/modules/hadoop-3.1.3/etc/hadoop
+   ```
+
+4. 在安装目录下 创建log文件夹
+
+   ```shell
+   mkdir /xxxxx/livy/logs
+   ```
+
+5. 启动livy
+
+   ```shell
+   # 进入到 livy 的安装路径 
+   cd xxx
+   bin/livy-server start
+   
+   # 关闭
+   bin/livy-server stop
+   ```
+
+   启动后 访问 http://ip:8998/ui
+
+   ![image-20220606180513212](图片/image-20220606180513212-165450991535315.png)
+
+6. 测试 是否安装 成功
+
+   a. 创建session
+
+   ```shell
+    curl -XPOST 'http://10.168.57.10:8998/sessions' -H "Content-Type:application/json" --data '{"kind":"spark"}'
+   ```
+
+   ![1646836917247](../document/学习内容+笔记/资料存放/关于项目/数据探查/数据探查安装文档/数据探查服务安装文档.assets/1646836917247.png)
+
+   注意:待到 livy server 的状态转换成idle的时候,向其发送请求,才会去执行。执行时,其状态转变成busy;执行完毕之后,状态又会变成idle。
+
+   b. 在当前文件下创建 hello.txt  并上传到 hdfs 中
+
+   ```shell
+   vim hello.txt
+   
+   nihao spark
+   nihao scala
+   hello livy 
+   
+   hadoop fs -mkdir /livydemo
+   hadoop fs -put ./hello.txt   /livydemo
+   ```
+
+   ![1646837302747](../document/学习内容+笔记/资料存放/关于项目/数据探查/数据探查安装文档/数据探查服务安装文档.assets/1646837302747.png)
+
+   c. 提交任务
+
+   ```shell
+   curl -XPOST 'http://10.168.57.10:8998/sessions/0/statements' -H 'Content-Type:application/json' -d '{"code":"sc.textFile(\"hdfs:///livy/data/word.txt\").flatMap(_.split(\" \")).map((_,1)).reduceByKey(_+_).saveAsTextFile(\"hdfs:///livy/result/1\")"}'
+   ```
+
+   进入 yarn 查看运行情况   http://ip:8088/
+
+   ![image-20220606180638150](图片/image-20220606180638150-165451000036617.png)
+
+### 2.7 Elasticsearch部署
+
+1. 上传解压文件,并分发到其他节点上
+
+   ```shell
+   # 解压缩
+   tar -zxvf elasticsearch-5.0.0.tar.gz -C /opt/modules
+   # 改名
+   mv elasticsearch-5.0.0 es-cluster
+   ```
+
+2. 修改 es/config/elasticsearch.yml  文件,分发文件
+
+   ```yaml
+   cluster.name: es-cluster    #集群名,不同名称代表不同集群
+   node.name: master    #节点名称,自定义
+   path.data: /opt/modules/es-cluster/es/data
+   
+   path.logs: /opt/modules/es-cluster/es/logs    #日志路径
+   bootstrap.memory_lock: false    #关闭锁内存
+   network.host: 10.168.57.10     #绑定IP地址,如果IP有别名可以尝试别名,数字ip可能会报错
+   http.port: 9200    #绑定端口
+   discovery.zen.ping.unicast.hosts: ["10.168.57.10", "10.168.57.11", "10.168.57.12"]    #集群列表,类型数组,可以是IP或域名
+   discovery.zen.minimum_master_nodes: 2    #节点数不能超过节点总数量(防止脑裂两台可配置成1,三台可配置2)
+   http.cors.enabled: true    #开启http网络节点发现
+   http.cors.allow-origin: "*"    #允许所有同网段节点发现
+   ```
+
+3. 启动前要做的事
+
+   ```shell
+   #建立es用户管理 es
+   [root@master modules]# cd es-cluster/
+   [root@master elasticsearch-5.0.0]# groupadd es
+   [root@master elasticsearch-5.0.0]# useradd es -g es -p codingwhy
+   [root@master elasticsearch-5.0.0]# chown -R es:es es-cluster
+   chown: cannot access ‘elasticsearch-5.0.0’: No such file or directory
+   [root@master elasticsearch-5.0.0]# cd ..
+   [root@master modules]# chown -R es:es es-cluster
+   
+   ```
+
+   修改两个配置
+
+   第一个,Linux系统的soft、hard值配置过低,至少65536;第二个,Linux系统vm.max_map_count值配置过低,至少262144
+
+   ```shell
+   vim /etc/security/limits.conf
+   *               soft    nofile           65536
+   *               hard    nofile           65536
+   vim /etc/sysctl.conf
+   #添加
+   vm.max_map_count=262144
+   #sysctl -p   生效
+   [root@localhost es]# sysctl -p
+   ```
+
+   
+
+1. 转成刚刚创建的用户再进入es的项目中
+
+   ```shell
+   su es 
+   ```
+
+   
+
+2. 启动 所有节点都要启动
+
+   ```shell
+   #前台运行,但运行后不能进行其他操作
+   elastic/bin/elasticsearch
+   # 后台运行,前台不显示
+   elastic/bin/elasticsearch -d
+   ```
+
+3. 访问  http://每个节点ip:9200/
+
+![image-20220606182338871](图片/image-20220606182338871-165451102096019.png)
+
+
+
+7. 在Elasticsearch中创建griffin索引(在命令行中执行即可)
+
+   ```json
+   curl -k -H "Content-Type: application/json" -X PUT http://192.168.100.157:9200/griffin \
+    -d '{
+       "aliases": {},
+       "mappings": {
+           "accuracy": {
+               "properties": {
+                   "name": {
+                       "fields": {
+                           "keyword": {
+                               "ignore_above": 256,
+                               "type": "keyword"
+                           }
+                       },
+                       "type": "text"
+                   },
+                   "tmst": {
+                       "type": "date"
+                   }
+               }
+           }
+       },
+       "settings": {
+           "index": {
+               "number_of_replicas": "2",
+               "number_of_shards": "5"
+           }
+       }
+   }'
+   ```
+
+   
+
+### 2.8 Griffin 的部署
+
+**说明:**在文件夹里总共有两个文件一个是 `measure-0.6.0.jar`和`service-0.6.0.tar.gz`,一般情况下只需要修改`service-0.6.0.tar.gz`里的相关配置,大多数是路径名和ip需要进行修改,现在把各个需要修改的文件内容放出来,便于以后的部署,这些需要修改的信息,全部都在config下,下面序号展示的目录是在源码中的位置,如果仅修改config的文件不能解决问题,则需要重新编译
+
+#### 1. 修改相关配置
+
+1. **service/src/main/resources/application.properties**
+
+   主要是修改相关ip、路径、账号密码
+
+   ```properties
+   # Apache Griffin server port (default 8080)
+   
+   server.port = 8091
+   spring.application.name=griffin_service
+   spring.datasource.url=jdbc:mysql://10.168.57.10:3306/quartz?useSSL=false
+   spring.datasource.username=root
+   spring.datasource.password=root
+   spring.jpa.generate-ddl=true
+   spring.datasource.driver-class-name=com.mysql.jdbc.Driver
+   spring.jpa.show-sql=true
+   # Hive metastore
+   hive.metastore.uris=thrift://10.168.57.10:9083
+   hive.metastore.dbname=default
+   hive.hmshandler.retry.attempts=15
+   hive.hmshandler.retry.interval=2000ms
+   #Hive jdbc
+   hive.jdbc.className=org.apache.hive.jdbc.HiveDriver
+   hive.jdbc.url=jdbc:hive2://10.168.57.10:10000/
+   hive.need.kerberos=false
+   hive.keytab.user=xxx@xx.com
+   hive.keytab.path=/path/to/keytab/file
+   # Hive cache time
+   cache.evict.hive.fixedRate.in.milliseconds=900000
+   # Kafka schema registry
+   kafka.schema.registry.url=http://localhost:8081
+   # Update job instance state at regular intervals
+   jobInstance.fixedDelay.in.milliseconds=60000
+   # Expired time of job instance which is 7 days that is 604800000 milliseconds.Time unit only supports milliseconds
+   jobInstance.expired.milliseconds=604800000
+   # schedule predicate job every 5 minutes and repeat 12 times at most
+   # interval time unit s:second m:minute h:hour d:day,only support these four units
+   predicate.job.interval=5m
+   predicate.job.repeat.count=12
+   # external properties directory location
+   external.config.location=
+   # external BATCH or STREAMING env
+   external.env.location=
+   # login strategy ("default" or "ldap")
+   login.strategy=default
+   # ldap
+   ldap.url=ldap://hostname:port
+   ldap.email=@example.com
+   ldap.searchBase=DC=org,DC=example
+   ldap.searchPattern=(sAMAccountName={0})
+   # hdfs default name
+   fs.defaultFS=
+   # elasticsearch
+   elasticsearch.host=10.168.57.10
+   elasticsearch.port=9200
+   elasticsearch.scheme=http
+   # elasticsearch.user = user
+   # elasticsearch.password = password
+   # livy
+   livy.uri=http://10.168.57.10:8998/batches
+   livy.need.queue=false
+   livy.task.max.concurrent.count=20
+   livy.task.submit.interval.second=3
+   livy.task.appId.retry.count=3
+   livy.need.kerberos=false
+   livy.server.auth.kerberos.principal=livy/kerberos.principal
+   livy.server.auth.kerberos.keytab=/path/to/livy/keytab/file
+   # yarn url
+   yarn.uri=http://10.168.57.11:8088
+   # griffin event listener
+   internal.event.listeners=GriffinJobEventHook
+   
+   logging.file=logs/griffin-service.log
+   ```
+
+2. **service/src/main/resources/quartz.properties**  
+
+   ```properties
+   org.quartz.scheduler.instanceName=spring-boot-quartz
+   org.quartz.scheduler.instanceId=AUTO
+   org.quartz.threadPool.threadCount=5
+   org.quartz.jobStore.class=org.quartz.impl.jdbcjobstore.JobStoreTX
+   # If you use postgresql as your database,set this property value to org.quartz.impl.jdbcjobstore.PostgreSQLDelegate
+   # If you use mysql as your database,set this property value to org.quartz.impl.jdbcjobstore.StdJDBCDelegate
+   # If you use h2 as your database, it's ok to set this property value to StdJDBCDelegate, PostgreSQLDelegate or others
+   #主要是这里要修改,如果用的mysql就是以下这个
+   org.quartz.jobStore.driverDelegateClass=org.quartz.impl.jdbcjobstore.StdJDBCDelegate
+   org.quartz.jobStore.useProperties=true
+   org.quartz.jobStore.misfireThreshold=60000
+   org.quartz.jobStore.tablePrefix=QRTZ_
+   org.quartz.jobStore.isClustered=true
+   org.quartz.jobStore.clusterCheckinInterval=20000
+   ```
+
+3. **service/src/main/resources/sparkProperties.json**
+
+   这里主要是hdoop文件的相关路径,一般咱们路径就是这个不需要修改
+
+   ```json
+   {
+     "file": "hdfs:///griffin/griffin-measure.jar",
+     "className": "org.apache.griffin.measure.Application",
+     "queue": "default",
+     "numExecutors": 2,
+     "executorCores": 1,
+     "driverMemory": "1g",
+     "executorMemory": "1g",
+     "conf": {
+       "spark.yarn.dist.files": "hdfs:///home/spark_conf/hive-site.xml"
+     },
+     "files": [
+     ]
+   }
+   ```
+
+4. **service/src/main/resources/env/env_batch.json**
+
+   ```json
+   {
+     "spark": {
+       "log.level": "WARN"
+     },
+     "sinks": [
+       {
+         "name": "console",
+         "type": "CONSOLE",
+         "config": {
+           "max.log.lines": 10
+         }
+       },
+       {
+         "name": "hdfs",
+         "type": "HDFS",
+         "config": {
+           "path": "hdfs:///griffin/persist",
+           "max.persist.lines": 10000,
+           "max.lines.per.file": 10000
+         }
+       },
+       {
+         "name": "elasticsearch",
+         "type": "ELASTICSEARCH",
+         "config": {
+           "method": "post",
+           "api": "http://10.168.57.10:9200/griffin/accuracy",
+           "connection.timeout": "1m",
+           "retry": 10
+         }
+       }
+     ],
+     "griffin.checkpoint": []
+   }
+   ```
+
+5. **service/src/main/resources/env/env_streaming.json**
+
+   ```json
+   {
+     "spark": {
+       "log.level": "WARN",
+       "checkpoint.dir": "hdfs:///griffin/checkpoint/${JOB_NAME}",
+       "init.clear": true,
+       "batch.interval": "1m",
+       "process.interval": "5m",
+       "config": {
+         "spark.default.parallelism": 4,
+         "spark.task.maxFailures": 5,
+         "spark.streaming.kafkaMaxRatePerPartition": 1000,
+         "spark.streaming.concurrentJobs": 4,
+         "spark.yarn.maxAppAttempts": 5,
+         "spark.yarn.am.attemptFailuresValidityInterval": "1h",
+         "spark.yarn.max.executor.failures": 120,
+         "spark.yarn.executor.failuresValidityInterval": "1h",
+         "spark.hadoop.fs.hdfs.impl.disable.cache": true
+       }
+     },
+     "sinks": [
+       {
+         "type": "CONSOLE",
+         "config": {
+           "max.log.lines": 100
+         }
+       },
+       {
+         "type": "HDFS",
+         "config": {
+           "path": "hdfs:///griffin/persist",
+           "max.persist.lines": 10000,
+           "max.lines.per.file": 10000
+         }
+       },
+       {
+         "type": "ELASTICSEARCH",
+         "config": {
+           "method": "post",
+           "api": "http://10.168.57.10:9200/griffin/accuracy"
+         }
+       }
+     ],
+     "griffin.checkpoint": [
+       {
+         "type": "zk",
+         "config": {
+           "hosts": "zk:2181",
+           "namespace": "griffin/infocache",
+           "lock.path": "lock",
+           "mode": "persist",
+           "init.clear": true,
+           "close.clear": false
+         }
+       }
+     ]
+   }
+   ```
+
+
+
+#### 2. 服务器中部署操作
+
+1. 上传measure的jar包到hadoop的文件夹中
+
+   ```shell
+   # 修改jar包名称
+   mv measure-0.6.0.jar griffin-measure.jar
+   # 上传jar包到hadoop中griffin的目录夹下,如果没有该目录夹需要创建
+   hdfs dfs -put griffin-measure.jar /griffin/
+   
+   #补充创建griffin文件夹的命令
+   #hdfs dfs -mkdir /griffin
+   ```
+
+2. 解压service文件并运行服务(在运行前一定要修改好相关配置)
+
+   ```shell
+   #进入service目录文件下
+   tar -zxvf target/service-0.6.0.tar.gz -C /opt/modules
+   cd service-0.6.0
+   # 服务开始
+   ./bin/griffin.sh start  
+   # or use ./bin/start.sh
+   # 服务停止
+   ./bin/griffin.sh stop
+   # or use ./bin/stop.sh
+   ```
+
+3. 访问界面
+
+   ```
+   http://<your IP>:<your port>
+   ```
+
+