软件开发专业网

15837636235

地址:无锡国家软件园
网 址:www.javaandc.com
邮 箱:renaide#126.com
手  机:15837636235
邮  编:100000
传 真:15837636235

java技术 > 如何的安装hadoop2

如何的安装hadoop2
发布时间:2014-10-19 新闻来源:软件开发专业网 浏览次数:892000

   如何的安装hadoop2?软件开发网注意到了在进行网站开发的过程中收集合理的利用数据是很重要的,因为分享数据可以让我们合理的利用资源。


  一、环境准备,参看hadoop安装手顺(一到四章节)

  二、找三台机器安装zookeeper,本例中三台服务器为masterha1、masterha2和master2

  三、安装hadoop2

  1.解压缩hadoop-2.2.0.tar.gz 并改名为hadoop2 添加环境变量HADOOP_HOME、PATH(注意除了bin目录外还有sbin目录)

  2.cd ~

  创建以下目录 权限设置为755(mkdir -m 755 xxx)

  mkdir -m 755 namedir

  mkdir -m 755 datadir

  mkdir -m 755 jndir

  mkdir -m 755 temp

  mkdir -m 755 hadoopmrsys

  mkdir -m 755 hadoopmrlocal

  mkdir -m 755 nodemanagerlocal

  mkdir -m 755 nodemanagerlogs

  mkdir -m 755 nodemanagerremote

  cd /home/hadoop/hadoop2/etc/hadoop

  修改core-site.xml

  <configuration>

  <property>

  <name>fs.defaultFS</name>

  <value>viewfs:///</value>

  </property>

  <property>

  <name>fs.viewfs.mounttable.default.link./tmp</name>

  <value>hdfs://hadoop-cluster1/tmp</value>

  </property>

  <property>

  <name>fs.viewfs.mounttable.default.link./tmp1</name>

  <value>hdfs://hadoop-cluster2/tmp2</value>

  </property>

  </configuration>

  修改hdfs-site.xml

  <configuration>

  <property>

  <name>dfs.nameservices</name>

  <value>hadoop-cluster1,hadoop-cluster2</value>

  </property>

  <!--cluster1-->

  <property>

  <name>dfs.ha.namenodes.hadoop-cluster1</name>

  <value>nn1,nn2</value>

  </property>

  <property>

  <name>dfs.namenode.rpc-address.hadoop-cluster1.nn1</name>

  <value>master1:9000</value>

  </property>

  <property>

  <name>dfs.namenode.rpc-address.hadoop-cluster1.nn2</name>

  <value>masterha1:9000</value>

  </property>

  <property>

  <name>dfs.namenode.http-address.hadoop-cluster1.nn1</name>

  <value>master1:50070</value>

  </property>

  <property>

  <name>dfs.namenode.http-address.hadoop-cluster1.nn2</name>

  <value>masterha1:50070</value>

  </property>

  <property>

  <name>dfs.namenode.secondary.http-address.hadoop-cluster1.nn1</name>

  <value>master1:9001</value>

  </property>

  <property>

  <name>dfs.namenode.secondary.http-address.hadoop-cluster1.nn2</name>

  <value>masterha1:9001</value>

  </property>

  <property>

  <name>dfs.client.failover.proxy.provider.hadoop-cluster1</name>

  <value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value>

  </property>

  <!--cluster1-->

  <!--cluster2-->

  <property>

  <name>dfs.ha.namenodes.hadoop-cluster2</name>

  <value>nn3,nn4</value>

  </property>

  <property>

  <name>dfs.namenode.rpc-address.hadoop-cluster2.nn3</name>

  <value>master2:9000</value>

  </property>

  <property>

  <name>dfs.namenode.rpc-address.hadoop-cluster2.nn4</name>

  <value>masterha2:9000</value>

  </property>

  <property>

  <name>dfs.namenode.http-address.hadoop-cluster2.nn3</name>

  <value>master2:50070</value>

  </property>

  <property>

  <name>dfs.namenode.http-address.hadoop-cluster2.nn4</name>

  <value>masterha2:50070</value>

  </property>

  <property>

  <name>dfs.namenode.secondary.http-address.hadoop-cluster2.nn3</name>

  <value>master2:9001</value>

  </property>

  <property>

  <name>dfs.namenode.secondary.http-address.hadoop-cluster2.nn4</name>

  <value>masterha2:9001</value>

  </property>

  <property>

  <name>dfs.client.failover.proxy.provider.hadoop-cluster2</name>

  <value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value>

  </property>

  <!--cluster2-->

  <property>

  <name>dfs.namenode.name.dir</name>

  <value>/home/hadoop/namedir</value>

  </property>

  <property>

  <name>dfs.namenode.shared.edits.dir.hadoop-cluster1.nn1</name>

  <value>qjournal://masterha1:8485;master2:8485;masterha2:8485/cluster1</value>

  </property>

  <property>

  <name>dfs.namenode.shared.edits.dir.hadoop-cluster1.nn2</name>

  <value>qjournal://masterha1:8485;master2:8485;masterha2:8485/cluster1</value>

  </property>

  <property>

  <name>dfs.namenode.shared.edits.dir.hadoop-cluster2.nn3</name>

  <value>qjournal://masterha1:8485;master2:8485;masterha2:8485/cluster2</value>

  </property>

  <property>

  <name>dfs.namenode.shared.edits.dir.hadoop-cluster2.nn4</name>

  <value>qjournal://masterha1:8485;master2:8485;masterha2:8485/cluster2</value>

  </property>

  <property>

  <name>dfs.datanode.data.dir</name>

  <value>/home/hadoop/datadir</value>

  </property>

  <property>

  <name>ha.zookeeper.quorum</name>

  <value>masterha1:2181,master2:2181,masterha2:2181</value>

  </property>

  <property>

  <name>ha.zookeeper.session-timeout.ms</name>

  <value>5000</value>

  </property>

  <property>

  <name>dfs.ha.automatic-failover.enabled</name>

  <value>true</value>

  </property>

  <property>

  <name>dfs.journalnode.edits.dir</name>

  <value>/home/hadoop/jndir</value>

  </property>

  <property>

  <name>dfs.replication</name>

  <value>2</value>

  </property>

  <property>

  <name>dfs.permission</name>

  <value>false</value>

  </property>

  <property>

  <name>dfs.webhdfs.enabled</name>

  <value>true</value>

  </property>

  <property>

  <name>dfs.support.append</name>

  <value>true</value>

  </property>

  <property>

  <name>hadoop.tmp.dir</name>

  <value>/home/hadoop/temp</value>

  </property>

  <property>

  <name>hadoop.proxyuser.hadoop.hosts</name>

  <value>*</value>

  </property>

  <property>

  <name>hadoop.proxyuser.hadoop.groups</name>

  <value>*</value>

  </property>

  <property>

  <name>dfs.ha.fencing.methods</name>

  <value>sshfence</value>

  </property>

  <property>

  <name>dfs.ha.fencing.ssh.private-key-files</name>

  <value>/home/hadoop/.ssh/id_rsa</value>

  </property>

  </configuration>

  cp mapred-site.xml.template mapred-site.xml

  修改mapred-site.xml

  <configuration>

  <property>

  <name>mapreduce.framework.name</name>

  <value>yarn</value>

  </property>

  <property>

  <name>mapreduce.job.tracker</name>

  <value>master1:54311</value>

  </property>

  <property>

  <name>mapreduce.jobhistory.address</name>

  <value>master1:10020</value>

  </property>

  <property>

  <name>mapreduce.jobhistory.webapp.address</name>

  <value>master1:19888</value>

  </property>

  <property>

  <name>mapred.system.dir</name>

  <value>/home/hadoop/hadoopmrsys</value>

  <final>true</final>

  </property>

  <property>

  <name>mapred.local.dir</name>

  <value>/home/hadoop/hadoopmrlocal</value>

  <final>true</final>

  </property>

  </configuration>

  修改yarn-site.xml

  <configuration>

  <property>

  <name>yarn.nodemanager.aux-services</name>

  <value>mapreduce_shuffle</value>

  </property>

  <property>

  <name>yarn.nodemanager.aux-services.mapreduce.shuffle.class</name>

  <value>org.apache.hadoop.mapred.ShuffleHandler</value>

  </property>

  <property>

  <name>yarn.nodemanager.local-dirs</name>

  <value>/home/hadoop/nodemanagerlocal</value>

  </property>

  <property>

  <name>yarn.nodemanager.log-dirs</name>

  <value>/home/hadoop/nodemanagerlogs</value>

  </property>

  <property>

  <name>yarn.nodemanager.remote-app-log-dir</name>

  <value>/home/hadoop/nodemanagerremote</value>

  </property>

  <property>

  <name>yarn.resourcemanager.address</name>

  <value>master1:18032</value>

  </property>

  <property>

  <name>yarn.resourcemanager.scheduler.address</name>

  <value>master1:18030</value>

  </property>

  <property>

  <name>yarn.resourcemanager.resource-tracker.address</name>

  <value>master1:18031</value>

  </property>

  <property>

  <name>yarn.resourcemanager.admin.address</name>

  <value>master1:18033</value>

  </property>

  <property>

  <name>yarn.resourcemanager.webapp.address</name>

  <value>master1:18088</value>

  </property>

  </configuration>

  修改slaves

  slave1

  slave2

  slave3

  修改hadoop-env.sh

  export JAVA_HOME=/opt/jdk1.6.0_32

  修改yarn-env.sh

  export JAVA_HOME=/opt/jdk1.6.0_32

  四、将配置好的hadoop分发到其余服务器上。

  五、启动hdfs

  启动masterha1、master2、masterha2节点上的zookeeper

  在master1和master2节点上执行:

  ./bin/hdfs zkfc -formatZK

  在masterha1、master2、masterha2节点上执行:

  $sbin/hadoop-daemons.sh start journalnode

  在master1节点上执行:

  $bin/hdfs namenode -format –clusterId hadoop-cluster-new

  scp -r namedir hadoop@masterha1:~

  $sbin/hadoop-daemon.sh start namenode

  在masterha1节点上执行:

  $bin/hdfs namenode -bootstrapStandby

  $sbin/hadoop-daemon.sh start namenode

  在master1,masterha1节点上执行:(将master1置成active状态)

  ./sbin/hadoop-daemon.sh start zkfc

  //$bin/hdfs haadmin -ns hadoop-cluster1 -transitionToActive nn1 这个是手动的方法(在master1上操作),如果前面的配置不是自动切换可以用这个

  在master2节点上执行:

  $bin/hdfs namenode -format –clusterId hadoop-cluster-new

  scp -r namedir hadoop@masterha2:~

  $sbin/hadoop-daemon.sh start namenode

  在masterha2节点上执行:

  $bin/hdfs namenode -bootstrapStandby

  $sbin/hadoop-daemon.sh start namenode

  在master2,masterha2节点上执行:(将master2置成active状态)

  ./sbin/hadoop-daemon.sh start zkfc

  //$bin/hdfs haadmin -ns hadoop-cluster2 -transitionToActive nn3

  在slave1、slave2、slave3节点上执行:(启动datanode)

  $sbin/hadoop-daemons.sh start datanode

  验证hdfs:

  http://master1:50070

  hadoop fs -mkdir hdfs://hadoop-cluster1/tmp

  hadoop fs -mkdir hdfs://hadoop-cluster2/tmp2

  hadoop fs -ls /

  //hadoop shell命令验证

  //注意操作的时候需要这样写了:hadoop fs -mkdir hdfs://hadoop-cluster1/aaa

  //hadoop fs -ls hdfs://hadoop-cluster1

  六、启动mapreduce和yarn

  在master1节点上执行:

  start-yarn.sh

  验证yarn:

  http://master1:18088

  hadoop jar hadoop-mapreduce-examples-2.2.0.jar randomwriter hdfs://hadoop-cluster1/outputdir

  七、关闭服务

  在master1节点上执行:

  stop-yarn.sh

  在slave1、slave2、slave3节点上执行:

  $sbin/hadoop-daemons.sh stop datanode

  在master1,masterha1,master2,masterha2上执行:

  $sbin/hadoop-daemons.sh stop namenode

  在masterha1,master2,masterha2上执行:

  $sbin/hadoop-daemons.sh stop journalnode

  在master1,master2上执行:

  $sbin/hadoop-daemons.sh stop zkfc

  在masterha1,master2,masterha2上执行:

  zkServer.sh stop
首页