FROM centos:centos7.7.1908
MAINTAINER "blue"
LABEL name="Hadoop"
RUN yum -y install openssh-server openssh-clients sudo vim net-tools expect
RUN groupadd -g 1124 hadoop && useradd -m -u 1124 -g hadoop -d /home/hadoop hadoop
RUN echo "hadoop:hadoop" | chpasswd
RUN echo "root:root" | chpasswd
RUN echo "hadoop ALL=(ALL) NOPASSWD:ALL" >> /etc/sudoers
#生成相应的主机密钥文件
RUN ssh-keygen -t rsa -f /etc/ssh/ssh_host_rsa_key
RUN ssh-keygen -t ecdsa -f /etc/ssh/ssh_host_ecdsa_key
RUN ssh-keygen -t ed25519 -f /etc/ssh/ssh_host_ed25519_key
RUN ssh-keygen -b 2048 -t rsa -f ~/.ssh/id_rsa -q -N "" && \
cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys
#创建模块和软件目录并修改权限
RUN mkdir /opt/software && mkdir /opt/moudle
#将宿主机的文件拷贝至镜像(ADD会自动解压)
ADD hadoop-3.2.1.tar.gz /opt/software
ADD jdk-8u212-linux-x64.tar.gz /opt/moudle
RUN mv /opt/software/hadoop-3.2.1 /opt/software/hadoop
RUN mv /opt/moudle/jdk1.8.0_212 /opt/moudle/jdk
ADD spark-3.0.0-preview2-bin-hadoop3.2.tar.gz /opt/software
RUN mv /opt/software/spark-3.0.0-preview2-bin-hadoop3.2 /opt/software/spark
RUN chown -R hadoop:hadoop /opt/moudle && chown -R hadoop:hadoop /opt/software
COPY CopyID /opt/moudle
RUN chmod +x /opt/moudle/CopyID
#设置环境变量
ENV CENTOS_DEFAULT_HOME /opt/software/hadoop
ENV JAVA_HOME /opt/moudle/jdk
ENV HADOOP_HOME /opt/software/hadoop
ENV JRE_HOME ${JAVA_HOME}/jre
ENV CLASSPATH ${JAVA_HOME}/lib:${JRE_HOME}/lib
ENV HADOOP_CONF_DIR=/opt/software/hadoop/etc/hadoop
ENV SPARK_HOME=/opt/software/spark
ENV PATH ${JAVA_HOME}/bin:${HADOOP_HOME}/bin:${HADOOP_HOME}/sbin:$SPARK_HOME/bin:$PATH
#终端默认登录进来的工作目录
WORKDIR $CENTOS_DEFAULT_HOME
#启动sshd服务并且暴露22端口
EXPOSE 22
CMD ["/usr/sbin/sshd", "-D"]
为了方便,添加进镜像的hadoop和spark均为配置过的
1.配置core-site.xml
$ vim /opt/hadoop/etc/hadoop/core-site.xml
<configuration>
<property>
<name>hadoop.tmp.dir</name>
<value>/opt/software/hadoop/tmp</value>
<description>Abase for other temporary directories.</description>
</property>
<property>
<name>fs.defaultFS</name>
<value>hdfs://master:9000</value>
</property>
</configuration>
2.配置hdfs-site.xml
$ vim /opt/hadoop/etc/hadoop/hdfs-site.xml
<configuration>
<property>
<name>dfs.replication</name>
<value>3</value>
</property>
<property>
<name>dfs.name.dir</name>
<value>/opt/software/hadoop/hdfs/name</value>
</property>
<property>
<name>dfs.data.dir</name>
<value>/opt/software/hadoop/hdfs/data</value>
</property>
</configuration>
3.配置mapred-site.xml
<configuration>
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
<property>
<name>mapreduce.application.classpath</name>
<value>$HADOOP_HOME/share/hadoop/mapreduce/*:$HADOOP_HOME/share/hadoop/mapreduce/lib/*</value>
</property>
</configuration>
4.配置yarn-site.xml
<configuration>
<!-- Site specific YARN configuration properties -->
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<property>
<name>yarn.resourcemanager.hostname</name>
<value>master</value>
</property>
<property>
<name>yarn.nodemanager.env-whitelist</name>
<value>JAVA_HOME,HADOOP_COMMON_HOME,HADOOP_HDFS_HOME,HADOOP_CONF_DIR,CLASSPATH_PREPEND_DISTCACHE,HADOOP_YARN_HOME,HADOOP_HOME</value>
</property>
</configuration>
6.配置workers
slave1
slave2
7.配置hdfs-env.sh
#添加以下内容
export JAVA_HOME=/opt/moudle/jdk
spark-env.sh
export SPARK_DIST_CLASSPATH=$(/opt/software/hadoop/bin/hadoop classpath)
export JAVA_HOME=/opt/moudle/jdk
export SPARK_MASTER_IP=172.20.0.2
export HADOOP_HOME=/opt/software/hadoop
export HADOOP_CONF_DIR=/opt/software/hadoop/etc/hadoop
slaves
slaveOne
slaveTwo
#!/bin/bash
MASTER=master
SLAVEONE=slaveOne
SLAVETWO=slaveTwo
IMAGES=4a3ac56328f3
arr=($MASTER $SLAVEONE $SLAVETWO)
NETWORK=hadoop
USER=hadoop
STARTNUMOFNODE=3
para=$1
startdocker() {
for (( i=0;i<$STARTNUMOFNODE;i++ ))
do
if [ $i -eq 0 ];then
docker run -it --name ${arr[$i]} -d --net $NETWORK --ip 172.20.0.2 -P -p 9000:9000 -p9870:9870 -p 8080:8080 --hostname ${arr[$i]} --add-host slaveOne:172.20.0.3 --add-host slaveTwo:172.20.0.4 --privileged=true $IMAGES
else
docker run -it --name ${arr[$i]} -d --net $NETWORK --ip 172.20.0.$[$i+2] -P --hostname ${arr[$i]} --add-host master:172.20.0.2 --add-host slaveOne:172.20.0.3 --add-host slaveTwo:172.20.0.4 --privileged=true $IMAGES
fi
done
echo "正在转发密钥"
docker exec --user $USER $MASTER /bin/bash -c "/usr/bin/ssh-keygen -b 2048 -t rsa -f ~/.ssh/id_rsa -q -N ''"
docker exec --user $USER $MASTER /bin/bash -c "cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys"
echo "正在转发密钥"
docker exec --user $USER $MASTER /bin/bash -c "/opt/moudle/CopyID hadoop hadoop $SLAVEONE"
docker exec --user $USER $MASTER /bin/bash -c "/opt/moudle/CopyID hadoop hadoop $SLAVETWO"
echo "容器启动完毕!"
echo "容器信息:"
docker ps -a
}
stopdocker() {
docker stop $(docker ps -qa)
docker rm $(docker ps -qa)
}
list() {
echo "容器信息:"
docker ps -a
}
ready() {
if [ $para = start ]
then
startdocker
elif [ $para = list ]
then
list
elif [ $para = stop ]
then
stopdocker
elif [ $para = startHdfs ]
then
startHdfs
elif [ $para = stopHdfs ]
then
stopHdfs
else
echo "$1 is not fount"
fi
}
ready
说明,必须将IMAGE参数修改为你的镜像id
启动之前,您需要自定义各个hadoop网络,去过您不太清楚可以执行下面语句
docker network create --subnet=172.20.0.0/16 hadoop
通过启动三个容器
sh start-hadoop-docker.sh start
通过stop参数,停止所有容器,并删除容器
sh start-hadoop-docker.sh stop
通过list参数列出容器信息
sh start-hadoop-docker.sh list
启动容器后
进入master节点容器
docker exec -it --user hadoop master /bin/bash
格式化namenode
hdfs namenode -format
启动hadoop集群
stat-all.sh
启动spark集群
cd /opt/softwarespark
sbin/start-all.sh
关闭hadoop集群
stop-all.sh
关闭spark集群
cd /opt/softwarespark
sbin/stop-all.sh
#!/usr/bin/expect
set timeout 10
set username [lindex $argv 0]
set password [lindex $argv 1]
set hostname [lindex $argv 2]
spawn ssh-copy-id -i $username@$hostname
expect "yes/no"
send "yes\r"
expect "password:"
send "$password\r"
expect eof
这个小jio本就是用来转发密钥,实现免密登录的,找了很多解决办法,最终还是expect好用,淦!
expect脚本传参数不能用$0,$1…,而应该用set username [lindex $argv 0]
默认将宿主机的9000,9870,8080映射到容器上
访问localhost:9870去管理Hadoop集群
访问localhost:8080管理spark
另外一些端口没有进行映射,可以自行访问172.20.0.2:port
手机扫一扫
移动阅读更方便
你可能感兴趣的文章