Linux 发展出了另一种虚拟化技术:Linux 容器(Containers)。Linux 容器不是模拟一个完整的操作系统,而是对进程进行隔离。或者说,在正常进程的外面套了一个 保护层。对于容器里面的进程来说,它接触到的各种资源都是虚拟的,从而实现与底层系统的隔离。由于容器是进程级别的,相比虚拟机有着资源占用少、体积小、启动快等优势。
Docker 属于 Linux 容器的一种封装,它是创建容器的工具与应用容器的引擎,提供了简单易用的容器使用接口。
创建自定义网络(bridge),这样网络中的主机除了可以使用 IP 访问外,还可以用容器名作为 hostname 相互访问:
1 2 3 4 5 6 7 8 9 10 11 12 13
# 创建网络bigdata $ docker network create bigdata
# 查看网络列表 $ docker network ls NETWORK ID NAME DRIVER SCOPE 8dc59d0036d7 bigdata bridge local 395484b151bd bridge bridge local 9ed6a8494749 host host local ad68f2065635 none null local
FROM centos:7 LABEL author="sannaha@sannaha.moe" LABEL description="annaha bigdata base docker images" # 设置工作目录 WORKDIR /opt/bigdata # 安装任何镜像之前更新一下源 RUN yum -y update # Docker安装过程是无交互的,必须使用-y参数来避免等待确认 RUN yum -y install java-1.8.0-openjdk-devel RUN yum -y install openssh-clients openssh-server net-tools inetutils-ping RUN ssh-keygen -t rsa -f /etc/ssh/ssh_host_rsa_key -N "" &&\ ssh-keygen -t ecdsa -f /etc/ssh/ssh_host_ecdsa_key -N "" &&\ ssh-keygen -t ed25519 -f /etc/ssh/ssh_host_ed25519_key -N "" RUN yum -y install which vim-common vim-enhanced
RUN mkdir /var/run/sshd
# 设置登录密码 RUNecho'root:root' | chpasswd # 使用sed文本编辑器通过正则表达式对配置文件内容进行修改 # 确保root用户可以ssh登录 RUN sed -ri 's/^PermitRootLogin\s+.*/PermitRootLogin yes/' /etc/ssh/sshd_config RUN sed -ri 's/^#PermitRootLogin\s+.*/PermitRootLogin yes/' /etc/ssh/sshd_config # 设置支持公钥登录 RUN sed -ri 's/^PubkeyAuthentication\s+.*/PubkeyAuthentication yes/' /etc/ssh/sshd_config RUN sed -ri 's/^#PubkeyAuthentication\s+.*/PubkeyAuthentication yes/' /etc/ssh/sshd_config RUN sed -ri 's/^#AuthorizedKeysFile/AuthorizedKeysFile/' /etc/ssh/sshd_config RUN sed -ri 's/UsePAM yes/#UsePAM yes/g' /etc/ssh/sshd_config RUN sed -ri 's/^StrictHostKeyChecking\s+.*/StrictHostKeyChecking no/' /etc/ssh/ssh_config RUN sed -ri 's/^#.*StrictHostKeyChecking\s+.*/StrictHostKeyChecking no/' /etc/ssh/ssh_config # 禁止密码登录 RUN sed -ri 's/^PasswordAuthentication\s+.*/PasswordAuthentication no/' /etc/ssh/sshd_config RUN sed -ri 's/^#PasswordAuthentication\s+.*/PasswordAuthentication no/' /etc/ssh/sshd_config # SSH login fix. Otherwise user is kicked off after login RUN sed 's@session\s*required\s*pam_loginuid.so@session optional pam_loginuid.so@g' -i /etc/pam.d/sshd
RUN cp /usr/share/zoneinfo/Asia/Shanghai /etc/localtime
COPY run.sh run.sh RUN chmod +x run.sh WORKDIR /root ADD .ssh.tar /root/ RUN chown root:root /root/.ssh RUN chmod 700 /root/.ssh
RUN mkdir -p /conf/hadoop /opt/bigdata/flume/task /conf/flume RUN mv /opt/bigdata/hadoop/etc/hadoop/slaves /conf/hadoop RUN ln -s /conf/hadoop/slaves /opt/bigdata/hadoop/etc/hadoop/slaves RUN mv /opt/bigdata/flume/task /conf/flume RUN ln -s /conf/flume/task /opt/bigdata/flume/task
# The java implementation to use. # 注意检查此处JDK路径应与base镜像中安装的JDK路径是否一致,避免报错 # export JAVA_HOME=/usr/lib/jvm/java-1.8.0-openjdk-1.8.0.282.b08-1.el7_9.x86_64/jre export JAVA_HOME=/usr/lib/jvm/java-1.8.0-openjdk-1.8.0.292.b10-1.el7_9.x86_64/jre
# The jsvc implementation to use. Jsvc is required to run secure datanodes # that bind to privileged ports to provide authentication of data transfer # protocol. Jsvc is not required if SASL is configured for authentication of # data transfer protocol using non-privileged ports. #export JSVC_HOME=${JSVC_HOME}
# Extra Java CLASSPATH elements. Automatically insert capacity-scheduler. for f in$HADOOP_HOME/contrib/capacity-scheduler/*.jar; do if [ "$HADOOP_CLASSPATH" ]; then export HADOOP_CLASSPATH=$HADOOP_CLASSPATH:$f else export HADOOP_CLASSPATH=$f fi done
# The maximum amount of heap to use, in MB. Default is 1000. #export HADOOP_HEAPSIZE= #export HADOOP_NAMENODE_INIT_HEAPSIZE=""
# Extra Java runtime options. Empty by default. export HADOOP_OPTS="$HADOOP_OPTS -Djava.net.preferIPv4Stack=true"
# Command specific options appended to HADOOP_OPTS when specified export HADOOP_NAMENODE_OPTS="-Dhadoop.security.logger=${HADOOP_SECURITY_LOGGER:-INFO,RFAS} -Dhdfs.audit.logger=${HDFS_AUDIT_LOGGER:-INFO,NullAppender}$HADOOP_NAMENODE_OPTS" export HADOOP_DATANODE_OPTS="-Dhadoop.security.logger=ERROR,RFAS $HADOOP_DATANODE_OPTS" export HADOOP_SECONDARYNAMENODE_OPTS="-Dhadoop.security.logger=${HADOOP_SECURITY_LOGGER:-INFO,RFAS} -Dhdfs.audit.logger=${HDFS_AUDIT_LOGGER:-INFO,NullAppender}$HADOOP_SECONDARYNAMENODE_OPTS" export HADOOP_NFS3_OPTS="$HADOOP_NFS3_OPTS" export HADOOP_PORTMAP_OPTS="-Xmx512m $HADOOP_PORTMAP_OPTS"
# The following applies to multiple commands (fs, dfs, fsck, distcp etc) export HADOOP_CLIENT_OPTS="-Xmx512m $HADOOP_CLIENT_OPTS" #HADOOP_JAVA_PLATFORM_OPTS="-XX:-UsePerfData $HADOOP_JAVA_PLATFORM_OPTS"
# On secure datanodes, user to run the datanode as after dropping privileges. # This **MUST** be uncommented to enable secure HDFS if using privileged ports # to provide authentication of data transfer protocol. This **MUST NOT** be # defined if SASL is configured for authentication of data transfer protocol # using non-privileged ports. export HADOOP_SECURE_DN_USER=${HADOOP_SECURE_DN_USER}
# Where log files are stored. $HADOOP_HOME/logs by default. #export HADOOP_LOG_DIR=${HADOOP_LOG_DIR}/$USER
# Where log files are stored in the secure data environment. export HADOOP_SECURE_DN_LOG_DIR=${HADOOP_LOG_DIR}/${HADOOP_HDFS_USER}
### # HDFS Mover specific parameters ### # Specify the JVM options to be used when starting the HDFS Mover. # These options will be appended to the options specified as HADOOP_OPTS # and therefore may override any similar flags set in HADOOP_OPTS # # export HADOOP_MOVER_OPTS=""
### # Advanced Users Only! ###
# The directory where pid files are stored. /tmp by default. # NOTE: this should be set to a directory that can only be written to by # the user that will run the hadoop daemons. Otherwise there is the # potential for a symlink attack. export HADOOP_PID_DIR=${HADOOP_PID_DIR} export HADOOP_SECURE_DN_PID_DIR=${HADOOP_PID_DIR}
# A string representing this instance of hadoop. $USER by default. export HADOOP_IDENT_STRING=$USER
# Licensed to the Apache Software Foundation (ASF) under one # or more contributor license agreements. See the NOTICE file # distributed with this work for additional information # regarding copyright ownership. The ASF licenses this file # to you under the Apache License, Version 2.0 (the # "License"); you may not use this file except in compliance # with the License. You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License.
# If this file is placed at FLUME_CONF_DIR/flume-env.sh, it will be sourced # during Flume startup.
# Enviroment variables can be set here. # 注意检查此处JDK路径应与base镜像中安装的JDK路径是否一致,避免报错 # export JAVA_HOME=/usr/lib/jvm/java-1.8.0-openjdk-1.8.0.282.b08-1.el7_9.x86_64/jre export JAVA_HOME=/usr/lib/jvm/java-1.8.0-openjdk-1.8.0.292.b10-1.el7_9.x86_64/jre
# Give Flume more memory and pre-allocate, enable remote monitoring via JMX # export JAVA_OPTS="-Xms100m -Xmx2000m -Dcom.sun.management.jmxremote"
# Let Flume write raw event data and configuration information to its log files for debugging # purposes. Enabling these flags is not recommended in production, # as it may result in logging sensitive user information or encryption secrets. # export JAVA_OPTS="$JAVA_OPTS -Dorg.apache.flume.log.rawdata=true -Dorg.apache.flume.log.printconfig=true "
# Note that the Flume conf directory is always included in the classpath. #FLUME_CLASSPATH=""
其他脚本
runFlume
1 2 3 4 5
#!/bin/sh if [ -f /opt/bigdata/flume/task/kafka-flume-log-handle.conf ]; then cd /opt/bigdata/flume/task nohup /opt/bigdata/flume/bin/flume-ng agent -n a1 -c conf -f kafka-flume-log-handle.conf -Dflume.root.logger=INFO,console & fi
# 查看网络列表 $ docker network ls NETWORK ID NAME DRIVER SCOPE 8dc59d0036d7 bigdata bridge local 395484b151bd bridge bridge local 9ed6a8494749 host host local ad68f2065635 none null local
# 查看镜像列表 $ docker images REPOSITORY TAG IMAGE ID CREATED SIZE hadoop 1.0 994c7bf60f99 18 hours ago 2.75GB bigdata base-1.0 718ece27b5ae 20 hours ago 1.23GB mysql 5.7 09361feeb475 2 weeks ago 447MB centos 7 8652b9f0cb4c 7 months ago 204MB
# 查看所有容器 $ docker ps -a CONTAINER ID IMAGE COMMAND CREATED STATUS PORTS NAMES 05d764353843 hadoop:1.0 "/bin/sh -c /opt/big…" 18 hours ago Up 25 seconds 0.0.0.0:8020->8020/tcp, :::8020->8020/tcp, 0.0.0.0:9000->9000/tcp, :::9000->9000/tcp, 0.0.0.0:50070->50070/tcp, :::50070->50070/tcp hadoopserver 3110f7f459c6 mysql:5.7 "docker-entrypoint.s…" 19 hours ago Exited (0) 17 hours ago mysql 2ad4d3248e8c bigdata:base-1.0 "/bin/sh -c /opt/big…" 20 hours ago Exited (137) 18 hours ago base
# 查看运行中的容器 $ docker ps CONTAINER ID IMAGE COMMAND CREATED STATUS PORTS NAMES 05d764353843 hadoop:1.0 "/bin/sh -c /opt/big…" 18 hours ago Up 2 seconds 0.0.0.0:8020->8020/tcp, :::8020->8020/tcp, 0.0.0.0:9000->9000/tcp, :::9000->9000/tcp, 0.0.0.0:50070->50070/tcp, :::50070->50070/tcp hadoopserver
# 进入容器对应目录 cd /var/lib/docker/containers/05d764353843e860b3cbb2f80c9197e1a2a73fea60f14320c1639e7da56bbe93/
# 修改hostconfig.json,在PortBindings中添加端口绑定 $ vim hostconfig.json
# 修改config.v2.json,在ExposedPorts中添加暴露端口 $ vim config.v2.json
# 启动docker服务 $ systemctl stop docker
# 运行容器,查看端口已经绑定 $ docker start hadoopserver && docker ps hadoopserver CONTAINER ID IMAGE COMMAND CREATED STATUS PORTS NAMES 05d764353843 hadoop:1.0 "/bin/sh -c /opt/big…" 4 days ago Up Less than a second 0.0.0.0:8020->8020/tcp, :::8020->8020/tcp, 0.0.0.0:9000->9000/tcp, :::9000->9000/tcp, 0.0.0.0:10000->10000/tcp, :::10000->10000/tcp, 0.0.0.0:50070->50070/tcp, :::50070->50070/tcp hadoopserver