steps
-
topology
+--------------+ +--------------+ +--------------+ | | | | | | | node1 | | node2 | | node3 | | | | | | | +--------------+ +--------------+ +--------------+
-
download mysql and mssql connector
-
installing the jdbc drivers and check my post 2015-05-25-apache-hive-on-mac-osx-yosemite and connectors and drivers and documentation
$ curl -L 'http://www.mysql.com/get/Downloads/Connector-J/mysql-connector-java-5.1.35.tar.gz/from/http://mysql.he.net/' | tar xz
-
$ curl -L 'http://download.microsoft.com/download/0/2/A/02AAE597-3865-456C-AE7F-613F99F850A8/sqljdbc_4.0.2206.100_enu.tar.gz' | tar xz
-
-
copy sqoop to node2
$ scp sqoop-1.4.6.bin__hadoop-2.0.4-alpha.tar.gz root@node2:/root/
-
installing sqoop
-
ssh to node2
# on mac $ ssh root@node2
-
unzip sqoop
# on node2 $ tar -zxf sqoop-1.4.6*.gz
-
rename folder
# on node2 $ mv sqoop-1.4.6.bin__hadoop-2.0.4-alpha sqoop1
-
-
installing connectors
-
mysql connector
# on mac $ cd mysql-connector-java-5.1.35 $ scp mysql-connector-java-5.1.35-bin.jar root@node2:/root/sqoop1/lib
-
mssql connector
# on mac $ cd sqljdbc_4.0/enu $ cp sqljdbc4.jar root@node2:/root/sqoop1/lib
-
-
config sqoop
-
check
/usr/lib
on node2# on node2 $ ls /usr/lib/ anaconda-runtime hbase java-ext rpm ConsoleKit hive jvm ruby cups java jvm-commmon scala deploy java-1.3.1 jvm-exports sendmail games java-1.4.0 jvm-private sendmail.postfix gcc java-1.4.1 locale sparkR hadoop java-1.4.2 lsb transwarp hadoop-hdfs java-1.5.0 ngmr transwarp-manager hadoop-mapreduce java-1.6.0 ngmr-shell yum-plugins hadoop-yarn java-1.7.0 python2.6 zookeeper
-
config
sqoop-env.sh
$ nano sqoop-env.sh # Licensed to the Apache Software Foundation (ASF) under one or more # contributor license agreements. See the NOTICE file distributed with # this work for additional information regarding copyright ownership. # The ASF licenses this file to You under the Apache License, Version 2.0 # (the "License"); you may not use this file except in compliance with # the License. You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # included in all the hadoop scripts with source command # should not be executable directly # also should not be passed any arguments, since we need original $* # Set Hadoop-specific environment variables here. #Set path to where bin/hadoop is available export HADOOP_COMMON_HOME=/usr/lib/hadoop #Set path to where hadoop-*-core.jar is available export HADOOP_MAPRED_HOME=/usr/lib/hadoop-mapreduce #set the path to where bin/hbase is available export HBASE_HOME=/usr/lib/hbase #Set the path to where bin/hive is available export HIVE_HOME=/usr/lib/hive #Set the path for where zookeper config dir is export ZOOCFGDIR=/usr/lib/zookeeper/conf
-
-
import data to hdfs
-
mssql to hdfs
$ ./sqoop import-all-tables --connect 'jdbc:sqlserver://mssqlhost;database=mydb;username=myname;password=mypass' -m 1
-
mysql to hdfs
$ ./sqoop import --connect jdbc:mysql://mysqlhost/mydb --username myname --password mypass --table mytable -m 1
-
-
import data to hive
-
single command
# on node2 $ ./sqoop import --connect 'jdbc:sqlserver://mssqlhost;database=mydb;username=myname;password=mypass' \ --table employee --hive-table emps --hive-import
-
permission denied
2015-09-06 10:44:18,348 INFO hive.HiveImport: FAILED: Execution Error, return code 1 from org.apache.hadoop.hive.ql.exec.DDLTask. MetaException(message:Got exception: org.apache.hadoop.security.AccessControlException Permission denied: user=root, access=EXECUTE, inode="/inceptorsql1/user/hive/warehouse":hive:hadoop:drwx------
-
fix permission denied
# on node2 $ sudo -u hdfs hdfs dfs -chmod 777 /inceptorsql1/user/hive/warehouse
-
check data in hive
# on node2 $ hive transwarp> show databases; OK default Time taken: 3.291 seconds, Fetched: 1 row(s) transwarp> use default; OK Time taken: 0.046 seconds transwarp> show tables; OK gsswdjb Time taken: 0.059 seconds, Fetched: 1 row(s) transwarp> select * from gsswdjb limit 10; OK ... data ...
-
tableau
# 1. install tableau # 2. install hortonworks hive odbc driver # 3. `hortonworks hadoop hive` # ip:node2 port:10000
-
data columns with
new line
characters# on node2 # import 160000 records from sqlserver # but got 320000+ records in hive # fixed $ ./sqoop import --connect 'jdbc:sqlserver://sqlhost;database=mydb;username=myuser;password=mypass' \ --query "select aa, bb, cc, REPLACE(REPLACE(dd, CHAR(13), '{13}'), CHAR(10), '{10}') dd from mytable where \$CONDITIONS" \ --hive-table mytable --hive-import --target-dir /inceptorsql1/user/hive/warehouse/mytable -m 1
-