steps
-
install ubuntu 15.04 desktop on 5 nodes
-
setup ssh
-
setup hadoop
-
config and start hadoop
-
setup zookeeper
-
setup hbase
-
setup sqoop
-
setup spark
-
fixed
1. setup ubuntu
-
download and install ubuntu desktop on 5 nodes
-
setup static ip address for 5 nodes
-
node5
$ sudo pico /etc/network/interfaces # interfaces(5) file used by ifup(8) and ifdown(8) auto lo iface lo inet loopback auto eth0 iface eth0 inet static address 192.168.120.155 netmask 255.255.255.0 network 192.168.120.0 broadcast 192.168.120.255 gateway 192.168.120.1 dns-nameservers 192.168.10.220 192.168.10.221 $ sudo pico /etc/resolv.conf # Dynamic resolv.conf(5) file for glibc resolver(3) generated by resolvconf(8) # DO NOT EDIT THIS FILE BY HAND -- YOUR CHANGES WILL BE OVERWRITTEN nameserver 192.168.10.220 nameserver 192.168.10.221 $ sudo pico /etc/hosts 127.0.0.1 localhost 127.0.1.1 node5 # The following lines are desirable for IPv6 capable hosts ::1 ip6-localhost ip6-loopback fe00::0 ip6-localnet ff00::0 ip6-mcastprefix ff02::1 ip6-allnodes ff02::2 ip6-allrouters 192.168.120.155 node5 192.168.120.154 node4 192.168.120.153 node3 192.168.120.152 node2 192.168.120.151 node1 $ ping www.baidu.com
-
node1 node2 node3 node4
# same as node5
-
mac os x yosemite
$ pico /private/etc/hosts ## # Host Database # # localhost is used to configure the loopback interface # when the system is booting. Do not change this entry. ## 127.0.0.1 localhost 255.255.255.255 broadcasthost ::1 localhost 192.168.120.155 node5 192.168.120.154 node4 192.168.120.153 node3 192.168.120.152 node2 192.168.120.151 node1
-
-
change apt sources
$ sudo sed 's@cn.archive.ubuntu.com@mirrors.163.com@' -i /etc/apt/sources.list $ sudo sed 's@security.ubuntu.com@mirrors.163.com@' -i /etc/apt/sources.list
-
set HADOOP_HOME JAVA_HOME
$ sudo pico .bashrc ... export JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64/jre export HADOOP_HOME=~/node5/hadoop export PATH=$PATH:$HADOOP_HOME export PATH=$PATH:$JAVA_HOME ... $ source .bashrc
-
firewall
-
check status of firewall
$ sudo ufw status
-
stop iptables service
$ sudo ufw disable
-
reload / restart iptables service
$ sudo ufw reload
-
get ipv4 iptables status
$ sudo iptables -L -n -v
-
get ipv6 ip6tables status
$ sudo ip6tables -L -n -v
-
-
add user
$ sudo addgroup hadoop $ sudo adduser --ingroup hadoop hduser $ sudo chown -R hduser:hadoop /opt/bigdata
-
-
for sublime-text-2:
$ sudo add-apt-repository ppa:webupd8team/sublime-text-2 $ sudo apt-get update $ sudo apt-get install sublime-text
-
for sublime-text-3:
$ sudo add-apt-repository ppa:webupd8team/sublime-text-3 $ sudo apt-get update $ sudo apt-get install sublime-text-installer
-
-
install java on five nodes
$ sudo apt-get install openjdk-7-jdk
2. setup ssh
-
install openssh-server
-
node1 to node5
$ sudo apt-get install openssh-server
-
-
generate ssh key
-
node5
$ su - hduser $ ssh-keygen -t rsa -P "" $ cd ~/.ssh $ cat id_rsa.pub >> authorized_keys
-
-
setup passphraseless ssh
$ ssh-copy-id -i id_rsa.pub hduser@node1 $ ssh-copy-id -i id_rsa.pub hduser@node2 $ ssh-copy-id -i id_rsa.pub hduser@node3 $ ssh-copy-id -i id_rsa.pub hduser@node4
-
check ssh login
$ ssh hduser@node1 ... exit $ ssh hduser@node2 ... exit $ ssh hduser@node3 ... exit $ ssh hduser@node4 ... exit
-
$ rm -f .ssh/known_hosts or $ ssh-keygen -R "hduser"
3. setup hadoop
-
topology
node5 +--------------+ | | | NameNode | | | +--------------+ / | \ / | \ / | \ / | \ / | \ / | \ node2 node3 node4 +--------------+ +--------------+ +--------------+ | | | | | | | DataNode | | DataNode | | DataNode | | | | | | | +--------------+ +--------------+ +--------------+
-
setup
-
copy software to node
$ scp hadoop-2.7.0.tar.gz hduser@node5:~/
-
mkdir to install hadoop
$ ssh hduser@node5 $ sudo mkdir /opt/bigdata $ sudo chown -R hduser:hadoop /opt/bigdata $ exit ... # node4 node3 node2 ... $ ssh hduser@node1 $ sudo mkdir /opt/bigdata $ sudo chown -R hduser:node1 /opt/bigdata $ exit
-
unzip and config scp
$ tar -zxvf hadoop-2.7.0.tar.gz $ mv hadoop-2.7.0 /opt/bigdata/hadoop $ scp core-site.xml hdfs-site.xml mapred-site.xml slaves yarn-site.xml \ hduser@node5:/opt/bigdata/hadoop/etc/hadoop $ pico .bashrc ... export JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64/jre export HADOOP_PREFIX="/opt/bigdata/hadoop" export HADOOP_HOME=$HADOOP_PREFIX export HADOOP_COMMON_HOME=$HADOOP_PREFIX export HADOOP_CONF_DIR=$HADOOP_PREFIX/etc/hadoop export HADOOP_HDFS_HOME=$HADOOP_PREFIX export HADOOP_MAPRED_HOME=$HADOOP_PREFIX export HADOOP_YARN_HOME=$HADOOP_PREFIX export PATH=$PATH:$HADOOP_HOME export PATH=$PATH:$JAVA_HOME ...
-
sync folder
$ scp .bashrc hduser@node4:~/ $ scp .bashrc hduser@node3:~/ $ scp .bashrc hduser@node2:~/ $ scp .bashrc hduser@node1:~/ $ scp -r /opt/bigdata/hadoop hduser@node4:/opt/bigdata/hadoop $ scp -r /opt/bigdata/hadoop hduser@node3:/opt/bigdata/hadoop $ scp -r /opt/bigdata/hadoop hduser@node2:/opt/bigdata/hadoop $ scp -r /opt/bigdata/hadoop hduser@node1:/opt/bigdata/hadoop $ scp -r /opt/bigdata/hadoop/etc/hadoop hduser@node4:/opt/bigdata/hadoop/etc $ scp -r /opt/bigdata/hadoop/etc/hadoop hduser@node3:/opt/bigdata/hadoop/etc $ scp -r /opt/bigdata/hadoop/etc/hadoop hduser@node2:/opt/bigdata/hadoop/etc $ scp -r /opt/bigdata/hadoop/etc/hadoop hduser@node1:/opt/bigdata/hadoop/etc
-
config and start
-
config
-
core-site.xml
<?xml version="1.0" encoding="UTF-8"?> <?xml-stylesheet type="text/xsl" href="configuration.xsl"?> <configuration> <property> <name>fs.default.name</name> <value>hdfs://node5:9000</value> </property> <property> <name>hadoop.tmp.dir</name> <value>file:///app/hadoop/tmp</value> </property> </configuration>
-
hdfs-site.xml
<?xml version="1.0" encoding="UTF-8"?> <?xml-stylesheet type="text/xsl" href="configuration.xsl"?> <configuration> <property> <name>dfs.namenode.checkpoint.period</name> <value>3600</value> </property> <property> <name>dfs.namenode.name.dir</name> <value>file:///app/hadoop/hdfs/name</value> <final>true</final> </property> <property> <name>dfs.datanode.data.dir</name> <value>file:///app/hadoop/hdfs/data</value> <final>true</final> </property> <property> <name>dfs.blocksize</name> <value>134217728</value> </property> <property> <name>dfs.replication</name> <value>3</value> </property> <property> <name>dfs.permissions</name> <value>false</value> </property> <property> <name>dfs.namenode.handler.count</name> <value>50</value> </property> <property> <name>dfs.namenode.checkpoint.dir</name> <value>file:///app/hadoop/hdfs/namesecondary</value> </property> </configuration>
-
yarn-site.xml
<?xml version="1.0"?> <configuration> <property> <name>yarn.nodemanager.aux-services</name> <value>mapreduce_shuffle</value> </property> <property> <name>yarn.nodemanager.aux-services.mapreduce.shuffle.class</name> <value>org.apache.hadoop.mapred.ShuffleHandler</value> </property> <property> <name>yarn.resourcemanager.resource-tracker.address</name> <value>node5:8025</value> </property> <property> <name>yarn.resourcemanager.scheduler.address</name> <value>node5:8030</value> </property> <property> <name>yarn.resourcemanager.address</name> <value>node5:8040</value> </property> </configuration>
-
mapred-site.xml
<?xml version="1.0"?> <configuration> <property> <name>mapreduce.framework.name</name> <value>yarn</value> </property> </configuration>
-
slaves
node4 node3 node2 node1
-
-
format and start
-
on node5 format hdfs
$ bin/hdfs namenode -format # exit safemode $ bin/hdfs dfsadmin -report Configured Capacity: 5854310649856 (5.32 TB) Present Capacity: 5532915458048 (5.03 TB) DFS Remaining: 5532915384320 (5.03 TB) DFS Used: 73728 (72 KB) DFS Used%: 0.00% Under replicated blocks: 0 Blocks with corrupt replicas: 0 Missing blocks: 0 Missing blocks (with replication factor 1): 0 ------------------------------------------------- Live datanodes (3): Name: 192.168.120.154:50010 (node4) Hostname: node4 Decommission Status : Normal Configured Capacity: 1951699709952 (1.78 TB) DFS Used: 24576 (24 KB) Non DFS Used: 103953088512 (96.81 GB) DFS Remaining: 1847746596864 (1.68 TB) DFS Used%: 0.00% DFS Remaining%: 94.67% Configured Cache Capacity: 0 (0 B) Cache Used: 0 (0 B) Cache Remaining: 0 (0 B) Cache Used%: 100.00% Cache Remaining%: 0.00% Xceivers: 1 Last contact: Wed Jul 08 08:50:45 CST 2015 Name: 192.168.120.152:50010 (node2) Hostname: node2 Decommission Status : Normal Configured Capacity: 1951307567104 (1.77 TB) DFS Used: 24576 (24 KB) Non DFS Used: 104823349248 (97.62 GB) DFS Remaining: 1846484193280 (1.68 TB) DFS Used%: 0.00% DFS Remaining%: 94.63% Configured Cache Capacity: 0 (0 B) Cache Used: 0 (0 B) Cache Remaining: 0 (0 B) Cache Used%: 100.00% Cache Remaining%: 0.00% Xceivers: 1 Last contact: Wed Jul 08 08:50:45 CST 2015 Name: 192.168.120.153:50010 (node3) Hostname: node3 Decommission Status : Normal Configured Capacity: 1951303372800 (1.77 TB) DFS Used: 24576 (24 KB) Non DFS Used: 112618754048 (104.88 GB) DFS Remaining: 1838684594176 (1.67 TB) DFS Used%: 0.00% DFS Remaining%: 94.23% Configured Cache Capacity: 0 (0 B) Cache Used: 0 (0 B) Cache Remaining: 0 (0 B) Cache Used%: 100.00% Cache Remaining%: 0.00% Xceivers: 1 Last contact: Wed Jul 08 08:50:45 CST 2015
-
on node5 start
$ sbin/start-dfs.sh $ sbin/start-yarn.sh
-
on node5 stop
$ sbin/stop-yarn.sh $ sbin/stop-dfs.sh
-
5. setup zookeeper
-
topology
node2 node3 node4 +-----------+ +-----------+ +-----------+ | | | | | | | ZooKeeper | | ZooKeeper | | ZooKeeper | | | | | | | +-----------+ +-----------+ +-----------+
-
setup
-
copy software to node4
$ scp zookeeper-3.4.6.tar.gz hduser@node4:/opt/bigdata $ cd /opt/bigdata $ tar -zxf zookeeper-3.4.6.tar.gz $ mv zookeeper-3.4.6 zookeeper
-
config zookeeper
$ pico /opt/bigdata/zookeeper/conf/zoo.cfg tickTime=2000 dataDir=/app/hadoop/zookeeper/data clientPort=2181 initLimit=10 syncLimit=5 server.4=node4:20010:20011 server.3=node3:20010:20011 server.2=node2:20010:20011
-
copy
zookeeper
to node3, node2$ scp /opt/bigdata/zookeeper hduser@node3:/opt/bigdata $ scp /opt/bigdata/zookeeper hduser@node2:/opt/bigdata
-
mkdir for zookeeper data
# on node2 $ mkdir -p /app/hadoop/zookeeper/data $ chown -R hduser@hadoop /app/hadoop/zookeeper # on node3 $ mkdir -p /app/hadoop/zookeeper/data $ chown -R hduser@hadoop /app/hadoop/zookeeper # on node4 $ mkdir -p /app/hadoop/zookeeper/data $ chown -R hduser@hadoop /app/hadoop/zookeeper
-
config node’s
myid
# on node2 $ pico /app/hadoop/zookeeper/data/myid 2 # on node3 $ pico /app/hadoop/zookeeper/data/myid 3 # on node4 $ pico /app/hadoop/zookeeper/data/myid 4
-
-
start status stop
-
start zookeeper
# on node2 $ /opt/bigdata/zookeeper/bin/zkServer.sh start # on node3 $ /opt/bigdata/zookeeper/bin/zkServer.sh start # on node4 $ /opt/bigdata/zookeeper/bin/zkServer.sh start
-
check zookeeper status
# on node2 $ /opt/bigdata/zookeeper/bin/zkServer.sh status JMX enabled by default Using config: /opt/bigdata/zookeeper/bin/../conf/zoo.cfg Mode: follower # on node3 $ /opt/bigdata/zookeeper/bin/zkServer.sh status JMX enabled by default Using config: /opt/bigdata/zookeeper/bin/../conf/zoo.cfg Mode: follower # on node4 $ /opt/bigdata/zookeeper/bin/zkServer.sh status JMX enabled by default Using config: /opt/bigdata/zookeeper/bin/../conf/zoo.cfg Mode: leader
-
# use `echo` and `nc` $ echo ruok | nc node2 2181 imok $ echo status | nc node2 2181 Zookeeper version: 3.4.6-1569965, built on 02/20/2014 09:09 GMT Clients: /192.168.120.154:47757[1](queued=0,recved=25564,sent=25564) /192.168.120.153:44776[1](queued=0,recved=25565,sent=25565) /192.168.120.152:54094[1](queued=0,recved=25565,sent=25565) /192.168.120.152:38021[0](queued=0,recved=1,sent=0) /192.168.120.152:54089[1](queued=0,recved=25648,sent=25662) /192.168.120.155:35662[1](queued=0,recved=25565,sent=25565) /192.168.120.153:44772[1](queued=0,recved=25566,sent=25566) Latency min/avg/max: 0/0/36 Received: 170608 Sent: 170657 Connections: 7 Outstanding: 0 Zxid: 0x100000117 Mode: follower Node count: 42 $ echo status | nc node3 2181 Zookeeper version: 3.4.6-1569965, built on 02/20/2014 09:09 GMT Clients: /192.168.120.154:39911[1](queued=0,recved=227,sent=227) /192.168.120.152:48327[0](queued=0,recved=1,sent=0) Latency min/avg/max: 0/0/14 Received: 235 Sent: 234 Connections: 2 Outstanding: 0 Zxid: 0x100000117 Mode: follower Node count: 42 Zookeeper version: 3.4.6-1569965, built on 02/20/2014 09:09 GMT Clients: /192.168.120.155:53241[1](queued=0,recved=29290,sent=29290) /192.168.120.153:40275[1](queued=0,recved=25644,sent=25652) /192.168.120.152:54308[1](queued=0,recved=25570,sent=25570) /192.168.120.155:53240[1](queued=0,recved=25568,sent=25568) /192.168.120.154:48195[1](queued=0,recved=25625,sent=25634) /192.168.120.152:38243[0](queued=0,recved=1,sent=0) /192.168.120.155:53234[1](queued=0,recved=31166,sent=31203) /192.168.120.154:48202[1](queued=0,recved=25568,sent=25568) Latency min/avg/max: 0/0/110 Received: 218610 Sent: 218675 Connections: 8 Outstanding: 0 Zxid: 0x100000117 Mode: leader Node count: 42
-
stop zookeeper
# on node2 $ /opt/bigdata/zookeeper/bin/zkServer.sh stop # on node3 $ /opt/bigdata/zookeeper/bin/zkServer.sh stop # on node4 $ /opt/bigdata/zookeeper/bin/zkServer.sh stop
-
6. setup hbase
-
topology
node5 +--------------+ | | | Master | | | +--------------+ / | \ / | \ / | \ / | \ / | \ / | \ node2 node3 node4 +--------------+ +--------------+ +--------------+ | | | | | | | ZooKeeper | | ZooKeeper | | ZooKeeper | | RegionServer | | RegionServer | | RegionServer | | | | | | | +--------------+ +--------------+ +--------------+
-
setup
-
copy software to node5
$ scp hbase-1.0.1.1-bin.tar.gz hduser@node4:/opt/bigdata $ cd /opt/bigdata $ tar -zxf hbase-1.0.1.1-bin.tar.gz $ mv hbase-1.0.1.1 hbase
-
config hbase
$ pico /opt/bigdata/hbase/conf/regionservers node4 node3 node2 $ pico /opt/bigdata/hbase/conf/backup-masters node2 $ pico /opt/bigdata/hbase/conf/hbase-site.xml <?xml version="1.0"?> <?xml-stylesheet type="text/xsl" href="configuration.xsl"?> <configuration> <property> <name>hbase.zookeeper.quorum</name> <value>node4,node3,node2</value> </property> <property> <name>hbase.rootdir</name> <value>hdfs://node5:9000/hbase</value> </property> <property> <name>hbase.cluster.distributed</name> <value>true</value> </property> </configuration>
-
copy
hbase
to node4, node3, node2$ scp /opt/bigdata/hbase hduser@node4:/opt/bigdata $ scp /opt/bigdata/hbase hduser@node3:/opt/bigdata $ scp /opt/bigdata/hbase hduser@node2:/opt/bigdata
-
-
start stop
-
start
# on node5 $ /opt/bigdata/hbase/bin/start-hbase.sh
-
stop
# on node5 $ /opt/bigdata/hbase/bin/stop-hbase.sh
-
7. setup sqoop
-
topology
node2 node3 +--------------+ +--------------+ | | | | | sqoop server | | sqoop server | | | | | +--------------+ +--------------+
-
-
copy software to node2
$ scp sqoop-1.99.6-bin-hadoop200.tar.gz hduser@node2:/opt/bigdata $ cd /opt/bigdata $ tar -zxf sqoop-1.99.6-bin-hadoop200.tar.gz $ mv sqoop-1.99.6-bin-hadoop200 sqoop
-
config sqoop
# sqoop/server/conf/catalina.properties # hadoop's common and common's lib # hadoop's hdfs and hdfs's lib # hadoop's mapreduce and mapreduce's lib # hadoop's yarn and yarn's lib $ sed 's@/usr/lib/hadoop/\*.jar@/opt/bigdata/hadoop/share/hadoop/common/\*.jar@' -i /opt/bigdata/sqoop/server/conf/catalina.properties $ sed 's@/usr/lib/hadoop/lib/\*.jar@/opt/bigdata/hadoop/share/hadoop/common/lib/\*.jar@' -i /opt/bigdata/sqoop/server/conf/catalina.properties $ sed 's@/usr/lib/hadoop-hdfs/\*.jar@/opt/bigdata/hadoop/share/hadoop/hdfs/\*.jar@' -i /opt/bigdata/sqoop/server/conf/catalina.properties $ sed 's@/usr/lib/hadoop-hdfs/lib/\*.jar@/opt/bigdata/hadoop/share/hadoop/hdfs/lib/\*.jar@' -i /opt/bigdata/sqoop/server/conf/catalina.properties $ sed 's@/usr/lib/hadoop-mapreduce/\*.jar@/opt/bigdata/hadoop/share/hadoop/mapreduce/\*.jar@' -i /opt/bigdata/sqoop/server/conf/catalina.properties $ sed 's@/usr/lib/hadoop-mapreduce/lib/\*.jar@/opt/bigdata/hadoop/share/hadoop/mapreduce/lib/\*.jar@' -i /opt/bigdata/sqoop/server/conf/catalina.properties $ sed 's@/usr/lib/hadoop-yarn/\*.jar@/opt/bigdata/hadoop/share/hadoop/yarn/\*.jar@' -i /opt/bigdata/sqoop/server/conf/catalina.properties $ sed 's@/usr/lib/hadoop-yarn/lib/\*.jar@/opt/bigdata/hadoop/share/hadoop/yarn/lib/\*.jar@' -i /opt/bigdata/sqoop/server/conf/catalina.properties # sqoop/server/conf/sqoop.properties # hadoop's conf dir $ sed 's@/etc/hadoop/conf/@/opt/bigdata/hadoop/etc/hadoop/@' -i /opt/bigdata/sqoop/server/conf/sqoop.properties
-
verify
$ /opt/bigdata/sqoop/bin/sqoop2-tool verify ... Verification was successful. Tool class org.apache.sqoop.tools.tool.VerifyTool has finished correctly
-
-
start stop sqoop server
-
start
$ /opt/bigdata/sqoop/bin/sqoop2-server start Sqoop home directory: /opt/bigdata/sqoop Setting SQOOP_HTTP_PORT: 12000 Setting SQOOP_ADMIN_PORT: 12001 Using CATALINA_OPTS: Adding to CATALINA_OPTS: -Dsqoop.http.port=12000 -Dsqoop.admin.port=12001 Using CATALINA_BASE: /opt/bigdata/sqoop/server Using CATALINA_HOME: /opt/bigdata/sqoop/server Using CATALINA_TMPDIR: /opt/bigdata/sqoop/server/temp Using JRE_HOME: /usr/lib/jvm/java-8-openjdk-amd64/jre Using CLASSPATH: /opt/bigdata/sqoop/server/bin/bootstrap.jar
-
stop
$ /opt/bigdata/sqoop/bin/sqoop2-server stop
-
-
sqoop2-shell
-
start
$ /opt/bigdata/sqoop/bin/sqoop2-shell
-
set server
sqoop:000> set server --host node2
-
version
# 1. version sqoop:000> show version client version: Sqoop 1.99.6 source revision 07244c3915975f26f03d9e1edf09ab7d06619bb8 Compiled by root on Wed Apr 29 10:40:43 CST 2015 sqoop:000> show version --all client version: Sqoop 1.99.6 source revision 07244c3915975f26f03d9e1edf09ab7d06619bb8 Compiled by root on Wed Apr 29 10:40:43 CST 2015 server version: Sqoop 1.99.6 source revision 07244c3915975f26f03d9e1edf09ab7d06619bb8 Compiled by root on Wed Apr 29 10:40:43 CST 2015 API versions: [v1]
-
connector
sqoop:000> show connector +----+------------------------+---------+------------------------------------------------------+----------------------+ | Id | Name | Version | Class | Supported Directions | +----+------------------------+---------+------------------------------------------------------+----------------------+ | 1 | generic-jdbc-connector | 1.99.6 | org.apache.sqoop.connector.jdbc.GenericJdbcConnector | FROM/TO | | 2 | kite-connector | 1.99.6 | org.apache.sqoop.connector.kite.KiteConnector | FROM/TO | | 3 | hdfs-connector | 1.99.6 | org.apache.sqoop.connector.hdfs.HdfsConnector | FROM/TO | | 4 | kafka-connector | 1.99.6 | org.apache.sqoop.connector.kafka.KafkaConnector | TO | +----+------------------------+---------+------------------------------------------------------+----------------------+
-
link - hdfs
sqoop:000> create link -c 3 Creating link for connector with id 3 Please fill following values to create new link object Name: hdfs Link configuration HDFS URI: hdfs://node5:9000 Hadoop conf directory: /opt/bigdata/hadoop/etc/hadoop New link was successfully created with validation status OK and persistent id 1 sqoop:000> show link +----+------+--------------+----------------+---------+ | Id | Name | Connector Id | Connector Name | Enabled | +----+------+--------------+----------------+---------+ | 1 | hdfs | 3 | hdfs-connector | true | +----+------+--------------+----------------+---------+
-
link - mysql
sqoop:000> create link -c 1 Creating link for connector with id 1 Please fill following values to create new link object Name: mysql Link configuration JDBC Driver Class: com.mysql.jdbc.Driver JDBC Connection String: jdbc:mysql://node3/sqoop Username: sa Password: ** JDBC Connection Properties: There are currently 0 values in the map: entry# protocol=tcp There are currently 1 values in the map: protocol = tcp entry# There were warnings while create or update, but saved successfully. Warning message: Can't connect to the database with given credentials: Communications link failure The last packet sent successfully to the server was 0 milliseconds ago. The driver has not received any packets from the server. New link was successfully created with validation status WARNING and persistent id 2 sqoop:000> show link +----+-------+--------------+------------------------+---------+ | Id | Name | Connector Id | Connector Name | Enabled | +----+-------+--------------+------------------------+---------+ | 1 | hdfs | 3 | hdfs-connector | true | | 2 | mysql | 1 | generic-jdbc-connector | true | +----+-------+--------------+------------------------+---------+
-
link - sqlserver
sqoop:000> create link -c 1 Creating link for connector with id 1 Please fill following values to create new link object Name: mssql Link configuration JDBC Driver Class: com.microsoft.sqlserver.jdbc.SQLServerDriver JDBC Connection String: jdbc:sqlserver://192.168.120.151 Username: sa Password: ** JDBC Connection Properties: There are currently 0 values in the map: entry# protocol=tcp There are currently 1 values in the map: protocol = tcp entry# New link was successfully created with validation status OK and persistent id 3 sqoop:000> show link +----+-------+--------------+------------------------+---------+ | Id | Name | Connector Id | Connector Name | Enabled | +----+-------+--------------+------------------------+---------+ | 1 | hdfs | 3 | hdfs-connector | true | | 2 | mysql | 1 | generic-jdbc-connector | true | | 3 | mssql | 1 | generic-jdbc-connector | true | +----+-------+--------------+------------------------+---------+
-
create sqoop job to
ingest
data frommysql
tohdfs
sqoop:000> create job --from 2 --to 1 Creating job for links with from id 2 and to id 1 Please fill following values to create new job object Name: mysql_to_hdfs From database configuration Schema name: sqoop Table name: employee Table SQL statement: Table column names: Partition column name: Null value allowed for the partition column: Boundary query: Incremental read Check column: Last value: To HDFS configuration Override null value: Null value: Output format: 0 : TEXT_FILE 1 : SEQUENCE_FILE Choose: 0 Compression format: 0 : NONE 1 : DEFAULT 2 : DEFLATE 3 : GZIP 4 : BZIP2 5 : LZO 6 : LZ4 7 : SNAPPY 8 : CUSTOM Choose: 0 Custom compression format: Output directory: /user/hduser Append mode: Throttling resources Extractors: Loaders: New job was successfully created with validation status OK and persistent id 1 sqoop:000> show job +----+---------------+----------------+--------------+---------+ | Id | Name | From Connector | To Connector | Enabled | +----+---------------+----------------+--------------+---------+ | 1 | mysql_to_hdfs | 1 | 3 | true | +----+---------------+----------------+--------------+---------+
-
start job and check status
sqoop:000> start job -j 1 Submission details Job ID: 1 Server URL: http://node2:12000/sqoop/ Created by: hduser Creation date: 2015-07-09 17:26:59 CST Lastly updated by: hduser External ID: job_1436433861773_0001 http://node5:8088/proxy/application_1436433861773_0001/ 2015-07-09 17:26:59 CST: BOOTING - Progress is not available sqoop:000> status job -j 1
-
8. setup spark
-
topology
node5 +--------------+ | | | spark master | | | +--------------+ / | \ / | \ / | \ / | \ / | \ / | \ node2 node3 node4 +--------------+ +--------------+ +--------------+ | | | | | | | spark worker | | spark worker | | spark worker | | | | | | | +--------------+ +--------------+ +--------------+
-
-
copy software to node5
$ scp spark-1.3.1-bin-hadoop2.6.tgz hduser@node5:/opt/bigdata $ cd /opt/bigdata $ tar -zxf spark-1.3.1-bin-hadoop2.6.tgz $ mv spark-1.3.1-bin-hadoop2.6 spark $ scp -r /opt/bigdata/spark hduser@node4:/opt/bigdata/spark $ scp -r /opt/bigdata/spark hduser@node3:/opt/bigdata/spark $ scp -r /opt/bigdata/spark hduser@node2:/opt/bigdata/spark
-
start
spark master
on node5$ /opt/bigdata/spark/sbin/start-master.sh
-
start
spark worker
on node4 node3 node2# on node4 $ /opt/bigdata/spark/bin/spark-class org.apache.spark.deploy.worker.Worker spark://node5:7077 # on node3 $ /opt/bigdata/spark/bin/spark-class org.apache.spark.deploy.worker.Worker spark://node5:7077 # on node2 $ /opt/bigdata/spark/bin/spark-class org.apache.spark.deploy.worker.Worker spark://node5:7077
-
demo
$ /opt/bigdata/spark/bin/spark-submit \ --class org.apache.spark.examples.SparkPi \ --master yarn-cluster \ --num-executors 3 \ --driver-memory 4g \ --executor-memory 2g \ --executor-cores 1 \ --queue thequeue \ /opt/bigdata/spark/lib/spark-examples*.jar \ 10
-
fixed
-
problem connecting to server
-
log on node3
2015-07-08 08:20:39,044 WARN org.apache.hadoop.hdfs.server.datanode.DataNode: Problem connecting to server: node5/192.168.120.155:9000 2015-07-08 08:20:48,046 INFO org.apache.hadoop.ipc.Client: Retrying connect to server: node5/192.168.120.155:9000. Already tried 3 time(s); retry policy is RetryUpToMaximumCountWithFixedSleep(maxRetries=10, sleepTime=1000 MILLISECONDS)
-
hdfs service failed to start, issues with datanode and namenode
Harsh J Sorry, should've sent you a direct string… I mean something like this: 127.0.0.1 localhost johniv-able # 127.0.1.1 johniv-able # ^^^^^^^^^^^^^^^^^^^^^
-
-
ERROR 2003 (HY000): Can’t connect to MySQL server on ‘node3’ (61)
-
# 1. check status $ netstat -tulpen (Not all processes could be identified, non-owned process info will not be shown, you would have to be root to see it all.) Active Internet connections (only servers) Proto Recv-Q Send-Q Local Address Foreign Address State User Inode PID/Program name tcp 0 0 0.0.0.0:22 0.0.0.0:* LISTEN 0 20360 - tcp 0 0 0.0.0.0:3306 0.0.0.0:* LISTEN 121 31073 - tcp 0 0 0.0.0.0:50010 0.0.0.0:* LISTEN 1001 29143 4193/java tcp 0 0 0.0.0.0:50020 0.0.0.0:* LISTEN 1001 33985 4193/java tcp 0 0 0.0.0.0:50075 0.0.0.0:* LISTEN 1001 29188 4193/java tcp 0 0 127.0.0.1:48933 0.0.0.0:* LISTEN 1001 29149 4193/java tcp 0 0 127.0.0.1:5939 0.0.0.0:* LISTEN 0 18701 - tcp 0 0 127.0.0.1:631 0.0.0.0:* LISTEN 0 120296 - tcp6 0 0 192.168.120.153:16020 :::* LISTEN 1001 33404 4416/java tcp6 0 0 ::1:631 :::* LISTEN 0 120295 - tcp6 0 0 :::13562 :::* LISTEN 1001 33608 4840/java tcp6 0 0 :::16030 :::* LISTEN 1001 33425 4416/java tcp6 0 0 :::22 :::* LISTEN 0 20362 - tcp6 0 0 :::52600 :::* LISTEN 1001 41017 4840/java tcp6 0 0 :::8040 :::* LISTEN 1001 33604 4840/java tcp6 0 0 :::8042 :::* LISTEN 1001 33609 4840/java udp 0 0 0.0.0.0:41761 0.0.0.0:* 107 16926 - udp 0 0 0.0.0.0:5353 0.0.0.0:* 107 16924 - udp 0 0 0.0.0.0:631 0.0.0.0:* 0 18693 - udp6 0 0 :::52756 :::* 107 16927 - udp6 0 0 :::5353 :::* 107 16925 - # 2. refer edit `my.cnf`
-
edit
my.cnf
$ sudo pico /etc/alternatives/my.cnf or $ sudo pico /etc/mysql/my.cnf # You can copy this to one of: # - "/etc/mysql/my.cnf" to set global options, # - "~/.my.cnf" to set user-specific options. [mysqld] bind-address = 0.0.0.0
-
grant privileges
mysql> grant all privileges on *.* to 'sa'@'%' identified by 'sa';
-
restart mysql service
$ /etc/init.d/mysql restart or $ service mysql restart
-
Connection refused
-
java.net.ConnectException
sqoop:000> start job -j 1 2015-07-09 16:30:32 CST: FAILURE_ON_SUBMIT Exception: java.net.ConnectException: Call From node2/192.168.120.152 to node5:8040 failed on connection exception: java.net.ConnectException: Connection refused; For more details see: http://wiki.apache.org/hadoop/ConnectionRefused
-
fixed
# yarn not started # on node5 $ cd /opt/bigdata/hadoop/etc/hadoop $ grep 8040 ./* ./yarn-site.xml: <value>node5:8040</value> # so start yarn $ /opt/bigdata/hadoop/sbin/start-yarn.sh
-
-
Class not found
-
Error message: Class not found
sqoop:000> create link -c 1 Creating link for connector with id 1 Please fill following values to create new link object Name: mssql Link configuration JDBC Driver Class: com.microsoft.jdbc.sqlserver JDBC Connection String: jdbc:sqlserver://192.168.120.151 Username: sa Password: ** JDBC Connection Properties: There are currently 0 values in the map: entry# protocol=tcp There are currently 1 values in the map: protocol = tcp entry# There are issues with entered data, please revise your input: Name: mssql Link configuration Error message: Class not found JDBC Driver Class: com.microsoft.jdbc.sqlserver.SQLServerDriver
-
fixed
# 1. find `sqljdbc4.jar` # 2. unzip `sqljdbc4.jar` # 3. check path # so sqlserver's driver class is com.microsoft.sqlserver.jdbc.SQLServerDriver
-