Quantcast
Channel: Donghua's Blog - DBAGlobe
Viewing all 604 articles
Browse latest View live

How to log connection detail denied by firewall

$
0
0
# Env: RHEL 7

[root@cdh-vm ~]# firewall-cmd  --get-log-denied
off
[root@cdh-vm ~]# firewall-cmd  --set-log-denied=all
success
[root@cdh-vm ~]# firewall-cmd  --get-log-denied
all

/var/log/message:

Jan 25 06:37:23 cdh-vm kernel: FINAL_REJECT: IN=enp0s3 OUT= MAC=08:00:27:8e:ba:87:08:00:27:d5:2f:09:08:00 SRC=192.168.56.202 DST=192.168.56.10 LEN=60 TOS=0x00 PREC=0x00 TTL=64 ID=32184 DF PROTO=TCP SPT=54142 DPT=7180 WINDOW=29200 RES=0x00 SYN URGP=0 
Jan 25 06:37:23 cdh-vm kernel: FINAL_REJECT: IN=enp0s3 OUT= MAC=08:00:27:8e:ba:87:08:00:27:d5:2f:09:08:00 SRC=192.168.56.202 DST=192.168.56.10 LEN=60 TOS=0x00 PREC=0x00 TTL=64 ID=41738 DF PROTO=TCP SPT=54144 DPT=7180 WINDOW=29200 RES=0x00 SYN URGP=0 
Jan 25 06:37:23 cdh-vm kernel: FINAL_REJECT: IN=enp0s3 OUT= MAC=08:00:27:8e:ba:87:08:00:27:d5:2f:09:08:00 SRC=192.168.56.202 DST=192.168.56.10 LEN=60 TOS=0x00 PREC=0x00 TTL=64 ID=35171 DF PROTO=TCP SPT=54146 DPT=7180 WINDOW=29200 RES=0x00 SYN URGP=0 
Jan 25 06:37:23 cdh-vm kernel: FINAL_REJECT: IN=enp0s3 OUT= MAC=08:00:27:8e:ba:87:08:00:27:d5:2f:09:08:00 SRC=192.168.56.202 DST=192.168.56.10 LEN=60 TOS=0x00 PREC=0x00 TTL=64 ID=43104 DF PROTO=TCP SPT=54148 DPT=7180 WINDOW=29200 RES=0x00 SYN URGP=0 
Jan 25 06:37:24 cdh-vm kernel: FINAL_REJECT: IN=enp0s3 OUT= MAC=08:00:27:8e:ba:87:08:00:27:d5:2f:09:08:00 SRC=192.168.56.202 DST=192.168.56.10 LEN=60 TOS=0x00 PREC=0x00 TTL=64 ID=28193 DF PROTO=TCP SPT=54150 DPT=7180 WINDOW=29200 RES=0x00 SYN URGP=0 
Jan 25 06:37:33 cdh-vm kernel: FINAL_REJECT: IN=enp0s3 OUT= MAC=08:00:27:8e:ba:87:08:00:27:d5:2f:09:08:00 SRC=192.168.56.202 DST=192.168.56.10 LEN=60 TOS=0x00 PREC=0x00 TTL=64 ID=31855 DF PROTO=TCP SPT=54152 DPT=7180 WINDOW=29200 RES=0x00 SYN URGP=0 


Configure EM Express for Oracle 12.2

$
0
0


SQL> select dbms_xdb_config.getHttpsPort() from dual;

DBMS_XDB_CONFIG.GETHTTPSPORT()
------------------------------
                              0

SQL> select dbms_xdb.getHttpPort() from dual;

DBMS_XDB.GETHTTPPORT()
----------------------
                      0

SQL> exec dbms_xdb_config.sethttpsport(5500);

PL/SQL procedure successfully completed.

SQL> select dbms_xdb_config.getHttpsPort() from dual;

DBMS_XDB_CONFIG.GETHTTPSPORT()
------------------------------
                          5500

SQL> !lsnrctl status |grep -i http
  (DESCRIPTION=(ADDRESS=(PROTOCOL=tcps)(HOST=vmxdb01.dbaglobe.com)(PORT=5500))(Security=(my_wallet_directory=/u01/app/oracle/admin/orcl/xdb_wallet))(Presentation=HTTP)(Session=RAW))

SQL> select dbms_xdb.getHttpPort() from dual;

DBMS_XDB.GETHTTPPORT()
----------------------
                     0

image

image

Use Sqoop to import into hive tables (source is a view)

$
0
0
[donghua@cdh-vm test_db-master]$ ls -l /opt/cloudera/parcels/CDH/lib/sqoop/lib/
total 0
lrwxrwxrwx 1 root root 35 Nov  9 13:49 ant-contrib-1.0b3.jar -> ../../../jars/ant-contrib-1.0b3.jar
lrwxrwxrwx 1 root root 40 Nov  9 13:49 ant-eclipse-1.0-jvm1.2.jar -> ../../../jars/ant-eclipse-1.0-jvm1.2.jar
lrwxrwxrwx 1 root root 41 Nov  9 13:42 avro-mapred-hadoop2.jar -> ../../../lib/avro/avro-mapred-hadoop2.jar
lrwxrwxrwx 1 root root 26 Nov  9 13:42 avro.jar -> ../../../lib/avro/avro.jar
lrwxrwxrwx 1 root root 35 Nov  9 13:49 commons-codec-1.4.jar -> ../../../jars/commons-codec-1.4.jar
lrwxrwxrwx 1 root root 40 Nov  9 13:49 commons-compress-1.4.1.jar -> ../../../jars/commons-compress-1.4.1.jar
lrwxrwxrwx 1 root root 32 Nov  9 13:49 commons-io-1.4.jar -> ../../../jars/commons-io-1.4.jar
lrwxrwxrwx 1 root root 36 Nov  9 13:49 commons-jexl-2.1.1.jar -> ../../../jars/commons-jexl-2.1.1.jar
lrwxrwxrwx 1 root root 35 Nov  9 13:49 commons-lang3-3.4.jar -> ../../../jars/commons-lang3-3.4.jar
lrwxrwxrwx 1 root root 39 Nov  9 13:49 commons-logging-1.1.3.jar -> ../../../jars/commons-logging-1.1.3.jar
lrwxrwxrwx 1 root root 30 Nov  9 13:49 fastutil-6.3.jar -> ../../../jars/fastutil-6.3.jar
lrwxrwxrwx 1 root root 33 Nov  9 13:49 hsqldb-1.8.0.10.jar -> ../../../jars/hsqldb-1.8.0.10.jar
lrwxrwxrwx 1 root root 43 Nov  9 13:49 jackson-annotations-2.3.1.jar -> ../../../jars/jackson-annotations-2.3.1.jar
lrwxrwxrwx 1 root root 36 Nov  9 13:49 jackson-core-2.3.1.jar -> ../../../jars/jackson-core-2.3.1.jar
lrwxrwxrwx 1 root root 40 Nov  9 13:49 jackson-core-asl-1.8.8.jar -> ../../../jars/jackson-core-asl-1.8.8.jar
lrwxrwxrwx 1 root root 40 Nov  9 13:49 jackson-databind-2.3.1.jar -> ../../../jars/jackson-databind-2.3.1.jar
lrwxrwxrwx 1 root root 42 Nov  9 13:49 jackson-mapper-asl-1.8.8.jar -> ../../../jars/jackson-mapper-asl-1.8.8.jar
lrwxrwxrwx 1 root root 36 Nov  9 13:42 kite-data-core.jar -> ../../../lib/kite/kite-data-core.jar
lrwxrwxrwx 1 root root 36 Nov  9 13:42 kite-data-hive.jar -> ../../../lib/kite/kite-data-hive.jar
lrwxrwxrwx 1 root root 41 Nov  9 13:42 kite-data-mapreduce.jar -> ../../../lib/kite/kite-data-mapreduce.jar
lrwxrwxrwx 1 root root 47 Nov  9 13:42 kite-hadoop-compatibility.jar -> ../../../lib/kite/kite-hadoop-compatibility.jar
lrwxrwxrwx 1 root root 29 Nov  9 13:49 opencsv-2.3.jar -> ../../../jars/opencsv-2.3.jar
lrwxrwxrwx 1 root root 31 Nov  9 13:49 paranamer-2.3.jar -> ../../../jars/paranamer-2.3.jar
lrwxrwxrwx 1 root root 37 Nov  9 13:42 parquet-avro.jar -> ../../../lib/parquet/parquet-avro.jar
lrwxrwxrwx 1 root root 39 Nov  9 13:42 parquet-column.jar -> ../../../lib/parquet/parquet-column.jar
lrwxrwxrwx 1 root root 39 Nov  9 13:42 parquet-common.jar -> ../../../lib/parquet/parquet-common.jar
lrwxrwxrwx 1 root root 41 Nov  9 13:42 parquet-encoding.jar -> ../../../lib/parquet/parquet-encoding.jar
lrwxrwxrwx 1 root root 39 Nov  9 13:42 parquet-format.jar -> ../../../lib/parquet/parquet-format.jar
lrwxrwxrwx 1 root root 39 Nov  9 13:42 parquet-hadoop.jar -> ../../../lib/parquet/parquet-hadoop.jar
lrwxrwxrwx 1 root root 40 Nov  9 13:42 parquet-jackson.jar -> ../../../lib/parquet/parquet-jackson.jar
lrwxrwxrwx 1 root root 33 Nov  9 13:49 slf4j-api-1.7.5.jar -> ../../../jars/slf4j-api-1.7.5.jar
lrwxrwxrwx 1 root root 37 Nov  9 13:49 snappy-java-1.0.4.1.jar -> ../../../jars/snappy-java-1.0.4.1.jar
lrwxrwxrwx 1 root root 24 Nov  9 13:49 xz-1.0.jar -> ../../../jars/xz-1.0.jar
[donghua@cdh-vm test_db-master]$ 
[donghua@cdh-vm test_db-master]$ sudo ln -s /usr/share/java/mysql-connector-java.jar /opt/cloudera/parcels/CDH/lib/sqoop/lib/
[sudo] password for donghua: 
[donghua@cdh-vm test_db-master]$ readlink /opt/cloudera/parcels/CDH/lib/sqoop/lib/mysql-connector-java.jar
/usr/share/java/mysql-connector-java.jar
[donghua@cdh-vm test_db-master]$ 


MariaDB [(none)]> create user employee_user identified by 'password';
Query OK, 0 rows affected (0.07 sec)

MariaDB [(none)]> grant all on employees.* to employee_user;
Query OK, 0 rows affected (0.04 sec)

MariaDB [(none)]> show grants for employee_user;
+--------------------------------------------------------------------------------------------------------------+
| Grants for employee_user@%                                                                                   |
+--------------------------------------------------------------------------------------------------------------+
| GRANT USAGE ON *.* TO 'employee_user'@'%' IDENTIFIED BY PASSWORD '*2470C0C06DEE42FD1618BB99005ADCA2EC9D1E19' |
| GRANT ALL PRIVILEGES ON `employees`.* TO 'employee_user'@'%'                                                |
+--------------------------------------------------------------------------------------------------------------+
2 rows in set (0.00 sec)

[donghua@cdh-vm test_db-master]$ sqoop list-databases --connect jdbc:mysql://cdh-vm.dbaglobe.com --username employee_user --password password 
Warning: /opt/cloudera/parcels/CDH-5.13.1-1.cdh5.13.1.p0.2/bin/../lib/sqoop/../accumulo does not exist! Accumulo imports will fail.
Please set $ACCUMULO_HOME to the root of your Accumulo installation.
18/01/26 23:32:56 INFO sqoop.Sqoop: Running Sqoop version: 1.4.6-cdh5.13.1
18/01/26 23:32:56 WARN tool.BaseSqoopTool: Setting your password on the command-line is insecure. Consider using -P instead.
18/01/26 23:32:56 INFO manager.MySQLManager: Preparing to use a MySQL streaming resultset.
information_schema
employees

[donghua@cdh-vm test_db-master]$ sqoop list-tables --connect jdbc:mysql://cdh-vm.dbaglobe.com/employees --username employee_user --password password 
Warning: /opt/cloudera/parcels/CDH-5.13.1-1.cdh5.13.1.p0.2/bin/../lib/sqoop/../accumulo does not exist! Accumulo imports will fail.
Please set $ACCUMULO_HOME to the root of your Accumulo installation.
18/01/26 23:33:17 INFO sqoop.Sqoop: Running Sqoop version: 1.4.6-cdh5.13.1
18/01/26 23:33:17 WARN tool.BaseSqoopTool: Setting your password on the command-line is insecure. Consider using -P instead.
18/01/26 23:33:17 INFO manager.MySQLManager: Preparing to use a MySQL streaming resultset.
current_dept_emp
departments
dept_emp
dept_emp_latest_date
dept_manager
employees
salaries
titles

[donghua@cdh-vm test_db-master]$ sqoop import --connect jdbc:mysql://cdh-vm.dbaglobe.com/employees --username employee_user --password password --table current_dept_emp
Warning: /opt/cloudera/parcels/CDH-5.13.1-1.cdh5.13.1.p0.2/bin/../lib/sqoop/../accumulo does not exist! Accumulo imports will fail.
Please set $ACCUMULO_HOME to the root of your Accumulo installation.
18/01/26 23:37:48 INFO sqoop.Sqoop: Running Sqoop version: 1.4.6-cdh5.13.1
18/01/26 23:37:48 WARN tool.BaseSqoopTool: Setting your password on the command-line is insecure. Consider using -P instead.
18/01/26 23:37:49 INFO manager.MySQLManager: Preparing to use a MySQL streaming resultset.
18/01/26 23:37:49 INFO tool.CodeGenTool: Beginning code generation
18/01/26 23:37:49 INFO manager.SqlManager: Executing SQL statement: SELECT t.* FROM `current_dept_emp` AS t LIMIT 1
18/01/26 23:37:49 INFO manager.SqlManager: Executing SQL statement: SELECT t.* FROM `current_dept_emp` AS t LIMIT 1
18/01/26 23:37:49 INFO orm.CompilationManager: HADOOP_MAPRED_HOME is /opt/cloudera/parcels/CDH/lib/hadoop-mapreduce
Note: /tmp/sqoop-donghua/compile/f0cac41ee0eb9df573aa4341b36a671d/current_dept_emp.java uses or overrides a deprecated API.
Note: Recompile with -Xlint:deprecation for details.
18/01/26 23:37:51 INFO orm.CompilationManager: Writing jar file: /tmp/sqoop-donghua/compile/f0cac41ee0eb9df573aa4341b36a671d/current_dept_emp.jar
18/01/26 23:37:51 WARN manager.MySQLManager: It looks like you are importing from mysql.
18/01/26 23:37:51 WARN manager.MySQLManager: This transfer can be faster! Use the --direct
18/01/26 23:37:51 WARN manager.MySQLManager: option to exercise a MySQL-specific fast path.
18/01/26 23:37:51 INFO manager.MySQLManager: Setting zero DATETIME behavior to convertToNull (mysql)
18/01/26 23:37:51 ERROR tool.ImportTool: Import failed: No primary key could be found for table current_dept_emp. Please specify one with --split-by or perform a sequential import with '-m 1'.


[donghua@cdh-vm test_db-master]$ sqoop import --connect jdbc:mysql://cdh-vm.dbaglobe.com/employees --username employee_user --password password --table current_dept_emp -m 1
Warning: /opt/cloudera/parcels/CDH-5.13.1-1.cdh5.13.1.p0.2/bin/../lib/sqoop/../accumulo does not exist! Accumulo imports will fail.
Please set $ACCUMULO_HOME to the root of your Accumulo installation.
18/01/26 23:38:08 INFO sqoop.Sqoop: Running Sqoop version: 1.4.6-cdh5.13.1
18/01/26 23:38:08 WARN tool.BaseSqoopTool: Setting your password on the command-line is insecure. Consider using -P instead.
18/01/26 23:38:08 INFO manager.MySQLManager: Preparing to use a MySQL streaming resultset.
18/01/26 23:38:08 INFO tool.CodeGenTool: Beginning code generation
18/01/26 23:38:09 INFO manager.SqlManager: Executing SQL statement: SELECT t.* FROM `current_dept_emp` AS t LIMIT 1
18/01/26 23:38:09 INFO manager.SqlManager: Executing SQL statement: SELECT t.* FROM `current_dept_emp` AS t LIMIT 1
18/01/26 23:38:09 INFO orm.CompilationManager: HADOOP_MAPRED_HOME is /opt/cloudera/parcels/CDH/lib/hadoop-mapreduce
Note: /tmp/sqoop-donghua/compile/3cb418ffe5487ad8ed8b36689ec598f4/current_dept_emp.java uses or overrides a deprecated API.
Note: Recompile with -Xlint:deprecation for details.
18/01/26 23:38:10 INFO orm.CompilationManager: Writing jar file: /tmp/sqoop-donghua/compile/3cb418ffe5487ad8ed8b36689ec598f4/current_dept_emp.jar
18/01/26 23:38:11 WARN manager.MySQLManager: It looks like you are importing from mysql.
18/01/26 23:38:11 WARN manager.MySQLManager: This transfer can be faster! Use the --direct
18/01/26 23:38:11 WARN manager.MySQLManager: option to exercise a MySQL-specific fast path.
18/01/26 23:38:11 INFO manager.MySQLManager: Setting zero DATETIME behavior to convertToNull (mysql)
18/01/26 23:38:11 INFO mapreduce.ImportJobBase: Beginning import of current_dept_emp
18/01/26 23:38:11 INFO Configuration.deprecation: mapred.jar is deprecated. Instead, use mapreduce.job.jar
18/01/26 23:38:12 INFO Configuration.deprecation: mapred.map.tasks is deprecated. Instead, use mapreduce.job.maps
18/01/26 23:38:12 INFO client.RMProxy: Connecting to ResourceManager at cdh-vm.dbaglobe.com/192.168.56.10:8032
18/01/26 23:38:17 INFO db.DBInputFormat: Using read commited transaction isolation
18/01/26 23:38:18 INFO mapreduce.JobSubmitter: number of splits:1
18/01/26 23:38:18 INFO mapreduce.JobSubmitter: Submitting tokens for job: job_1517023991003_0001
18/01/26 23:38:19 INFO impl.YarnClientImpl: Submitted application application_1517023991003_0001
18/01/26 23:38:19 INFO mapreduce.Job: The url to track the job: http://cdh-vm.dbaglobe.com:8088/proxy/application_1517023991003_0001/
18/01/26 23:38:19 INFO mapreduce.Job: Running job: job_1517023991003_0001
18/01/26 23:38:30 INFO mapreduce.Job: Job job_1517023991003_0001 running in uber mode : false
18/01/26 23:38:30 INFO mapreduce.Job:  map 0% reduce 0%
18/01/26 23:38:42 INFO mapreduce.Job:  map 100% reduce 0%
18/01/26 23:38:43 INFO mapreduce.Job: Job job_1517023991003_0001 completed successfully
18/01/26 23:38:43 INFO mapreduce.Job: Counters: 30
File System Counters
FILE: Number of bytes read=0
FILE: Number of bytes written=173876
FILE: Number of read operations=0
FILE: Number of large read operations=0
FILE: Number of write operations=0
HDFS: Number of bytes read=87
HDFS: Number of bytes written=10110817
HDFS: Number of read operations=4
HDFS: Number of large read operations=0
HDFS: Number of write operations=2
Job Counters 
Launched map tasks=1
Other local map tasks=1
Total time spent by all maps in occupied slots (ms)=8922
Total time spent by all reduces in occupied slots (ms)=0
Total time spent by all map tasks (ms)=8922
Total vcore-milliseconds taken by all map tasks=8922
Total megabyte-milliseconds taken by all map tasks=13704192
Map-Reduce Framework
Map input records=300024
Map output records=300024
Input split bytes=87
Spilled Records=0
Failed Shuffles=0
Merged Map outputs=0
GC time elapsed (ms)=109
CPU time spent (ms)=3330
Physical memory (bytes) snapshot=281448448
Virtual memory (bytes) snapshot=2788491264
Total committed heap usage (bytes)=246939648
File Input Format Counters 
Bytes Read=0
File Output Format Counters 
Bytes Written=10110817
18/01/26 23:38:43 INFO mapreduce.ImportJobBase: Transferred 9.6424 MB in 31.2284 seconds (316.1811 KB/sec)
18/01/26 23:38:43 INFO mapreduce.ImportJobBase: Retrieved 300024 records.
[donghua@cdh-vm test_db-master]$ hdfs dfs -ls 
Found 3 items
drwx------   - donghua supergroup          0 2018-01-26 23:38 .staging
drwxr-xr-x   - donghua supergroup          0 2018-01-26 23:38 current_dept_emp
-rw-r--r--   1 donghua supergroup         15 2018-01-20 04:41 test.csv
[donghua@cdh-vm test_db-master]$ 


0: jdbc:hive2://localhost:10000/default> create database employees;
INFO  : Compiling command(queryId=hive_20180126234646_4c4d2716-9d75-4786-8c31-1ee517688165): create database employees
INFO  : Semantic Analysis Completed
INFO  : Returning Hive schema: Schema(fieldSchemas:null, properties:null)
INFO  : Completed compiling command(queryId=hive_20180126234646_4c4d2716-9d75-4786-8c31-1ee517688165); Time taken: 0.043 seconds
INFO  : Executing command(queryId=hive_20180126234646_4c4d2716-9d75-4786-8c31-1ee517688165): create database employees
INFO  : Starting task [Stage-0:DDL] in serial mode
INFO  : Completed executing command(queryId=hive_20180126234646_4c4d2716-9d75-4786-8c31-1ee517688165); Time taken: 0.182 seconds
INFO  : OK
No rows affected (0.351 seconds)

0: jdbc:hive2://localhost:10000/default> !sh hdfs dfs -ls /user/hive/warehouse/
Java HotSpot(TM) 64-Bit Server VM warning: ignoring option MaxPermSize=512M; support was removed in 8.0
Found 1 items
drwxrwxrwt   - anonymous hive          0 2018-01-26 23:46 /user/hive/warehouse/employees.db

[donghua@cdh-vm test_db-master]$ sqoop import --connect jdbc:mysql://cdh-vm.dbaglobe.com/employees --username employee_user --password password --table current_dept_emp --split-by=emp_no --hive-import --create-hive-table --hive-table=employees.current_dept_emp --warehouse-dir=/user/hive/warehouse
Warning: /opt/cloudera/parcels/CDH-5.13.1-1.cdh5.13.1.p0.2/bin/../lib/sqoop/../accumulo does not exist! Accumulo imports will fail.
Please set $ACCUMULO_HOME to the root of your Accumulo installation.
18/01/26 23:56:32 INFO sqoop.Sqoop: Running Sqoop version: 1.4.6-cdh5.13.1
18/01/26 23:56:32 WARN tool.BaseSqoopTool: Setting your password on the command-line is insecure. Consider using -P instead.
18/01/26 23:56:32 INFO tool.BaseSqoopTool: Using Hive-specific delimiters for output. You can override
18/01/26 23:56:32 INFO tool.BaseSqoopTool: delimiters with --fields-terminated-by, etc.
18/01/26 23:56:32 WARN tool.BaseSqoopTool: It seems that you're doing hive import directly into default
18/01/26 23:56:32 WARN tool.BaseSqoopTool: hive warehouse directory which is not supported. Sqoop is
18/01/26 23:56:32 WARN tool.BaseSqoopTool: firstly importing data into separate directory and then
18/01/26 23:56:32 WARN tool.BaseSqoopTool: inserting data into hive. Please consider removing
18/01/26 23:56:32 WARN tool.BaseSqoopTool: --target-dir or --warehouse-dir into /user/hive/warehouse in
18/01/26 23:56:32 WARN tool.BaseSqoopTool: case that you will detect any issues.
18/01/26 23:56:32 INFO manager.MySQLManager: Preparing to use a MySQL streaming resultset.
18/01/26 23:56:32 INFO tool.CodeGenTool: Beginning code generation
18/01/26 23:56:33 INFO manager.SqlManager: Executing SQL statement: SELECT t.* FROM `current_dept_emp` AS t LIMIT 1
18/01/26 23:56:33 INFO manager.SqlManager: Executing SQL statement: SELECT t.* FROM `current_dept_emp` AS t LIMIT 1
18/01/26 23:56:33 INFO orm.CompilationManager: HADOOP_MAPRED_HOME is /opt/cloudera/parcels/CDH/lib/hadoop-mapreduce
Note: /tmp/sqoop-donghua/compile/35ced35e8590fbbd798fa058e0584fed/current_dept_emp.java uses or overrides a deprecated API.
Note: Recompile with -Xlint:deprecation for details.
18/01/26 23:56:35 INFO orm.CompilationManager: Writing jar file: /tmp/sqoop-donghua/compile/35ced35e8590fbbd798fa058e0584fed/current_dept_emp.jar
18/01/26 23:56:35 WARN manager.MySQLManager: It looks like you are importing from mysql.
18/01/26 23:56:35 WARN manager.MySQLManager: This transfer can be faster! Use the --direct
18/01/26 23:56:35 WARN manager.MySQLManager: option to exercise a MySQL-specific fast path.
18/01/26 23:56:35 INFO manager.MySQLManager: Setting zero DATETIME behavior to convertToNull (mysql)
18/01/26 23:56:35 INFO mapreduce.ImportJobBase: Beginning import of current_dept_emp
18/01/26 23:56:35 INFO Configuration.deprecation: mapred.jar is deprecated. Instead, use mapreduce.job.jar
18/01/26 23:56:36 INFO Configuration.deprecation: mapred.map.tasks is deprecated. Instead, use mapreduce.job.maps
18/01/26 23:56:36 INFO client.RMProxy: Connecting to ResourceManager at cdh-vm.dbaglobe.com/192.168.56.10:8032
18/01/26 23:56:41 INFO db.DBInputFormat: Using read commited transaction isolation
18/01/26 23:56:41 INFO db.DataDrivenDBInputFormat: BoundingValsQuery: SELECT MIN(`emp_no`), MAX(`emp_no`) FROM `current_dept_emp`
18/01/26 23:56:41 INFO db.IntegerSplitter: Split size: 122499; Num splits: 4 from: 10001 to: 499999
18/01/26 23:56:42 INFO mapreduce.JobSubmitter: number of splits:4
18/01/26 23:56:42 INFO mapreduce.JobSubmitter: Submitting tokens for job: job_1517023991003_0003
18/01/26 23:56:42 INFO impl.YarnClientImpl: Submitted application application_1517023991003_0003
18/01/26 23:56:42 INFO mapreduce.Job: The url to track the job: http://cdh-vm.dbaglobe.com:8088/proxy/application_1517023991003_0003/
18/01/26 23:56:42 INFO mapreduce.Job: Running job: job_1517023991003_0003
18/01/26 23:56:50 INFO mapreduce.Job: Job job_1517023991003_0003 running in uber mode : false
18/01/26 23:56:50 INFO mapreduce.Job:  map 0% reduce 0%
18/01/26 23:56:58 INFO mapreduce.Job:  map 25% reduce 0%
18/01/26 23:57:03 INFO mapreduce.Job:  map 50% reduce 0%
18/01/26 23:57:08 INFO mapreduce.Job:  map 75% reduce 0%
18/01/26 23:57:13 INFO mapreduce.Job:  map 100% reduce 0%
18/01/26 23:57:14 INFO mapreduce.Job: Job job_1517023991003_0003 completed successfully
18/01/26 23:57:14 INFO mapreduce.Job: Counters: 30
File System Counters
FILE: Number of bytes read=0
FILE: Number of bytes written=698232
FILE: Number of read operations=0
FILE: Number of large read operations=0
FILE: Number of write operations=0
HDFS: Number of bytes read=464
HDFS: Number of bytes written=10110817
HDFS: Number of read operations=16
HDFS: Number of large read operations=0
HDFS: Number of write operations=8
Job Counters 
Launched map tasks=4
Other local map tasks=4
Total time spent by all maps in occupied slots (ms)=17721
Total time spent by all reduces in occupied slots (ms)=0
Total time spent by all map tasks (ms)=17721
Total vcore-milliseconds taken by all map tasks=17721
Total megabyte-milliseconds taken by all map tasks=27219456
Map-Reduce Framework
Map input records=300024
Map output records=300024
Input split bytes=464
Spilled Records=0
Failed Shuffles=0
Merged Map outputs=0
GC time elapsed (ms)=313
CPU time spent (ms)=8810
Physical memory (bytes) snapshot=927260672
Virtual memory (bytes) snapshot=11156475904
Total committed heap usage (bytes)=836239360
File Input Format Counters 
Bytes Read=0
File Output Format Counters 
Bytes Written=10110817
18/01/26 23:57:14 INFO mapreduce.ImportJobBase: Transferred 9.6424 MB in 38.4431 seconds (256.8429 KB/sec)
18/01/26 23:57:14 INFO mapreduce.ImportJobBase: Retrieved 300024 records.
18/01/26 23:57:14 INFO manager.SqlManager: Executing SQL statement: SELECT t.* FROM `current_dept_emp` AS t LIMIT 1
18/01/26 23:57:15 WARN hive.TableDefWriter: Column from_date had to be cast to a less precise type in Hive
18/01/26 23:57:15 WARN hive.TableDefWriter: Column to_date had to be cast to a less precise type in Hive
18/01/26 23:57:15 INFO hive.HiveImport: Loading uploaded data into Hive

Logging initialized using configuration in jar:file:/opt/cloudera/parcels/CDH-5.13.1-1.cdh5.13.1.p0.2/jars/hive-common-1.1.0-cdh5.13.1.jar!/hive-log4j.properties
OK
Time taken: 3.967 seconds
Loading data to table employees.current_dept_emp
Table employees.current_dept_emp stats: [numFiles=4, totalSize=10110817]
OK
Time taken: 0.85 seconds



[donghua@cdh-vm test_db-master]$ sqoop import --connect jdbc:mysql://cdh-vm.dbaglobe.com/employees --username employee_user --password password --table current_dept_emp --split-by=emp_no --hive-import --create-hive-table --hive-table=employees.current_dept_emp2 --target-dir=/user/donghua/current_dept_emp2
Warning: /opt/cloudera/parcels/CDH-5.13.1-1.cdh5.13.1.p0.2/bin/../lib/sqoop/../accumulo does not exist! Accumulo imports will fail.
Please set $ACCUMULO_HOME to the root of your Accumulo installation.
18/01/27 00:00:21 INFO sqoop.Sqoop: Running Sqoop version: 1.4.6-cdh5.13.1
18/01/27 00:00:21 WARN tool.BaseSqoopTool: Setting your password on the command-line is insecure. Consider using -P instead.
18/01/27 00:00:21 INFO tool.BaseSqoopTool: Using Hive-specific delimiters for output. You can override
18/01/27 00:00:21 INFO tool.BaseSqoopTool: delimiters with --fields-terminated-by, etc.
18/01/27 00:00:21 INFO manager.MySQLManager: Preparing to use a MySQL streaming resultset.
18/01/27 00:00:21 INFO tool.CodeGenTool: Beginning code generation
18/01/27 00:00:21 INFO manager.SqlManager: Executing SQL statement: SELECT t.* FROM `current_dept_emp` AS t LIMIT 1
18/01/27 00:00:21 INFO manager.SqlManager: Executing SQL statement: SELECT t.* FROM `current_dept_emp` AS t LIMIT 1
18/01/27 00:00:22 INFO orm.CompilationManager: HADOOP_MAPRED_HOME is /opt/cloudera/parcels/CDH/lib/hadoop-mapreduce
Note: /tmp/sqoop-donghua/compile/24b51955f91604b3504e2d409fe1d631/current_dept_emp.java uses or overrides a deprecated API.
Note: Recompile with -Xlint:deprecation for details.
18/01/27 00:00:23 INFO orm.CompilationManager: Writing jar file: /tmp/sqoop-donghua/compile/24b51955f91604b3504e2d409fe1d631/current_dept_emp.jar
18/01/27 00:00:23 WARN manager.MySQLManager: It looks like you are importing from mysql.
18/01/27 00:00:23 WARN manager.MySQLManager: This transfer can be faster! Use the --direct
18/01/27 00:00:23 WARN manager.MySQLManager: option to exercise a MySQL-specific fast path.
18/01/27 00:00:23 INFO manager.MySQLManager: Setting zero DATETIME behavior to convertToNull (mysql)
18/01/27 00:00:23 INFO mapreduce.ImportJobBase: Beginning import of current_dept_emp
18/01/27 00:00:23 INFO Configuration.deprecation: mapred.jar is deprecated. Instead, use mapreduce.job.jar
18/01/27 00:00:24 INFO Configuration.deprecation: mapred.map.tasks is deprecated. Instead, use mapreduce.job.maps
18/01/27 00:00:24 INFO client.RMProxy: Connecting to ResourceManager at cdh-vm.dbaglobe.com/192.168.56.10:8032
18/01/27 00:00:30 INFO db.DBInputFormat: Using read commited transaction isolation
18/01/27 00:00:30 INFO db.DataDrivenDBInputFormat: BoundingValsQuery: SELECT MIN(`emp_no`), MAX(`emp_no`) FROM `current_dept_emp`
18/01/27 00:00:31 INFO db.IntegerSplitter: Split size: 122499; Num splits: 4 from: 10001 to: 499999
18/01/27 00:00:31 INFO mapreduce.JobSubmitter: number of splits:4
18/01/27 00:00:32 INFO mapreduce.JobSubmitter: Submitting tokens for job: job_1517023991003_0004
18/01/27 00:00:32 INFO impl.YarnClientImpl: Submitted application application_1517023991003_0004
18/01/27 00:00:32 INFO mapreduce.Job: The url to track the job: http://cdh-vm.dbaglobe.com:8088/proxy/application_1517023991003_0004/
18/01/27 00:00:32 INFO mapreduce.Job: Running job: job_1517023991003_0004
18/01/27 00:00:39 INFO mapreduce.Job: Job job_1517023991003_0004 running in uber mode : false
18/01/27 00:00:39 INFO mapreduce.Job:  map 0% reduce 0%
18/01/27 00:00:48 INFO mapreduce.Job:  map 25% reduce 0%
18/01/27 00:00:53 INFO mapreduce.Job:  map 50% reduce 0%
18/01/27 00:00:58 INFO mapreduce.Job:  map 75% reduce 0%
18/01/27 00:01:05 INFO mapreduce.Job:  map 100% reduce 0%
18/01/27 00:01:05 INFO mapreduce.Job: Job job_1517023991003_0004 completed successfully
18/01/27 00:01:05 INFO mapreduce.Job: Counters: 30
File System Counters
FILE: Number of bytes read=0
FILE: Number of bytes written=698244
FILE: Number of read operations=0
FILE: Number of large read operations=0
FILE: Number of write operations=0
HDFS: Number of bytes read=464
HDFS: Number of bytes written=10110817
HDFS: Number of read operations=16
HDFS: Number of large read operations=0
HDFS: Number of write operations=8
Job Counters 
Launched map tasks=4
Other local map tasks=4
Total time spent by all maps in occupied slots (ms)=17494
Total time spent by all reduces in occupied slots (ms)=0
Total time spent by all map tasks (ms)=17494
Total vcore-milliseconds taken by all map tasks=17494
Total megabyte-milliseconds taken by all map tasks=26870784
Map-Reduce Framework
Map input records=300024
Map output records=300024
Input split bytes=464
Spilled Records=0
Failed Shuffles=0
Merged Map outputs=0
GC time elapsed (ms)=296
CPU time spent (ms)=8230
Physical memory (bytes) snapshot=935788544
Virtual memory (bytes) snapshot=11149619200
Total committed heap usage (bytes)=926416896
File Input Format Counters 
Bytes Read=0
File Output Format Counters 
Bytes Written=10110817
18/01/27 00:01:05 INFO mapreduce.ImportJobBase: Transferred 9.6424 MB in 40.7122 seconds (242.5281 KB/sec)
18/01/27 00:01:05 INFO mapreduce.ImportJobBase: Retrieved 300024 records.
18/01/27 00:01:05 INFO manager.SqlManager: Executing SQL statement: SELECT t.* FROM `current_dept_emp` AS t LIMIT 1
18/01/27 00:01:05 WARN hive.TableDefWriter: Column from_date had to be cast to a less precise type in Hive
18/01/27 00:01:05 WARN hive.TableDefWriter: Column to_date had to be cast to a less precise type in Hive
18/01/27 00:01:05 INFO hive.HiveImport: Loading uploaded data into Hive

Logging initialized using configuration in jar:file:/opt/cloudera/parcels/CDH-5.13.1-1.cdh5.13.1.p0.2/jars/hive-common-1.1.0-cdh5.13.1.jar!/hive-log4j.properties
OK
Time taken: 2.277 seconds
Loading data to table employees.current_dept_emp2
Table employees.current_dept_emp2 stats: [numFiles=4, totalSize=10110817]
OK
Time taken: 0.638 seconds


0: jdbc:hive2://localhost:10000/default> use employees;
INFO  : Compiling command(queryId=hive_20180127000909_679b9dfa-5161-467c-9620-8081c6686c8e): use employees
INFO  : Semantic Analysis Completed
INFO  : Returning Hive schema: Schema(fieldSchemas:null, properties:null)
INFO  : Completed compiling command(queryId=hive_20180127000909_679b9dfa-5161-467c-9620-8081c6686c8e); Time taken: 0.007 seconds
INFO  : Executing command(queryId=hive_20180127000909_679b9dfa-5161-467c-9620-8081c6686c8e): use employees
INFO  : Starting task [Stage-0:DDL] in serial mode
INFO  : Completed executing command(queryId=hive_20180127000909_679b9dfa-5161-467c-9620-8081c6686c8e); Time taken: 0.021 seconds
INFO  : OK
No rows affected (0.048 seconds)

0: jdbc:hive2://localhost:10000/default> !tables
+------------+--------------+--------------------+-------------+-------------------------------------------+--+
| TABLE_CAT  | TABLE_SCHEM  |     TABLE_NAME     | TABLE_TYPE  |                  REMARKS                  |
+------------+--------------+--------------------+-------------+-------------------------------------------+--+
|            | employees    | current_dept_emp   | TABLE       | Imported by sqoop on 2018/01/26 23:57:15  |
|            | employees    | current_dept_emp2  | TABLE       | Imported by sqoop on 2018/01/27 00:01:05  |
+------------+--------------+--------------------+-------------+-------------------------------------------+--+

0: jdbc:hive2://localhost:10000/default> !set maxcolumnwidth 200

0: jdbc:hive2://localhost:10000/default> show create table employees.current_dept_emp;
INFO  : Compiling command(queryId=hive_20180127005252_fe156650-eacf-492d-8860-17af7d4fc590): show create table employees.current_dept_emp
INFO  : Semantic Analysis Completed
INFO  : Returning Hive schema: Schema(fieldSchemas:[FieldSchema(name:createtab_stmt, type:string, comment:from deserializer)], properties:null)
INFO  : Completed compiling command(queryId=hive_20180127005252_fe156650-eacf-492d-8860-17af7d4fc590); Time taken: 0.03 seconds
INFO  : Executing command(queryId=hive_20180127005252_fe156650-eacf-492d-8860-17af7d4fc590): show create table employees.current_dept_emp
INFO  : Starting task [Stage-0:DDL] in serial mode
INFO  : Completed executing command(queryId=hive_20180127005252_fe156650-eacf-492d-8860-17af7d4fc590); Time taken: 0.009 seconds
INFO  : OK
+----------------------------------------------------------------------------------------+--+
|                                     createtab_stmt                                     |
+----------------------------------------------------------------------------------------+--+
| CREATE TABLE `employees.current_dept_emp`(                                             |
|   `emp_no` int,                                                                        |
|   `dept_no` string,                                                                    |
|   `from_date` string,                                                                  |
|   `to_date` string)                                                                    |
| COMMENT 'Imported by sqoop on 2018/01/26 23:57:15'                                    |
| ROW FORMAT SERDE                                                                       |
|   'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'                                |
| WITH SERDEPROPERTIES (                                                                 |
|   'field.delim'='\u0001',                                                              |
|   'line.delim'='\n',                                                                   |
|   'serialization.format'='\u0001')                                                     |
| STORED AS INPUTFORMAT                                                                  |
|   'org.apache.hadoop.mapred.TextInputFormat'                                          |
| OUTPUTFORMAT                                                                           |
|   'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'                        |
| LOCATION                                                                               |
|   'hdfs://cdh-vm.dbaglobe.com:8020/user/hive/warehouse/employees.db/current_dept_emp' |
| TBLPROPERTIES (                                                                        |
|   'COLUMN_STATS_ACCURATE'='true',                                                      |
|   'numFiles'='4',                                                                      |
|   'totalSize'='10110817',                                                              |
|   'transient_lastDdlTime'='1517029041')                                                |
+----------------------------------------------------------------------------------------+--+
23 rows selected (0.087 seconds)

0: jdbc:hive2://localhost:10000/default> show create table employees.current_dept_emp2;
INFO  : Compiling command(queryId=hive_20180127005252_e90f722a-ffd4-400d-ae8b-aa76c382dc78): show create table employees.current_dept_emp2
INFO  : Semantic Analysis Completed
INFO  : Returning Hive schema: Schema(fieldSchemas:[FieldSchema(name:createtab_stmt, type:string, comment:from deserializer)], properties:null)
INFO  : Completed compiling command(queryId=hive_20180127005252_e90f722a-ffd4-400d-ae8b-aa76c382dc78); Time taken: 0.027 seconds
INFO  : Executing command(queryId=hive_20180127005252_e90f722a-ffd4-400d-ae8b-aa76c382dc78): show create table employees.current_dept_emp2
INFO  : Starting task [Stage-0:DDL] in serial mode
INFO  : Completed executing command(queryId=hive_20180127005252_e90f722a-ffd4-400d-ae8b-aa76c382dc78); Time taken: 0.013 seconds
INFO  : OK
+-----------------------------------------------------------------------------------------+--+
|                                     createtab_stmt                                      |
+-----------------------------------------------------------------------------------------+--+
| CREATE TABLE `employees.current_dept_emp2`(                                             |
|   `emp_no` int,                                                                         |
|   `dept_no` string,                                                                     |
|   `from_date` string,                                                                   |
|   `to_date` string)                                                                     |
| COMMENT 'Imported by sqoop on 2018/01/27 00:01:05'                                     |
| ROW FORMAT SERDE                                                                        |
|   'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'                                 |
| WITH SERDEPROPERTIES (                                                                  |
|   'field.delim'='\u0001',                                                               |
|   'line.delim'='\n',                                                                    |
|   'serialization.format'='\u0001')                                                      |
| STORED AS INPUTFORMAT                                                                   |
|   'org.apache.hadoop.mapred.TextInputFormat'                                           |
| OUTPUTFORMAT                                                                            |
|   'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'                         |
| LOCATION                                                                                |
|   'hdfs://cdh-vm.dbaglobe.com:8020/user/hive/warehouse/employees.db/current_dept_emp2' |
| TBLPROPERTIES (                                                                         |
|   'COLUMN_STATS_ACCURATE'='true',                                                       |
|   'numFiles'='4',                                                                       |
|   'totalSize'='10110817',                                                               |
|   'transient_lastDdlTime'='1517029269')                                                 |
+-----------------------------------------------------------------------------------------+--+
23 rows selected (0.079 seconds)
0: jdbc:hive2://localhost:10000/default> 


[root@cdh-vm ~]# hdfs dfs -ls /user//hive/warehouse
Found 2 items
drwxrwxrwt   - donghua   hive          0 2018-01-27 00:01 /user/hive/warehouse/employees.db
drwxrwxrwt   - donghua   hive          0 2018-01-27 00:38 /user/hive/warehouse/test.db
[hdfs@cdh-vm ~]$ hdfs dfs -ls /user//hive/warehouse/employees.db
Found 2 items
drwxrwxrwt   - donghua hive          0 2018-01-26 23:57 /user/hive/warehouse/employees.db/current_dept_emp
drwxrwxrwt   - donghua hive          0 2018-01-27 00:01 /user/hive/warehouse/employees.db/current_dept_emp2



0: jdbc:hive2://localhost:10000/default> select count(*) from employees.current_dept_emp;
INFO  : Compiling command(queryId=hive_20180127003737_611e22dd-873d-48f6-9888-a414f9b5cf0a): select count(*) from employees.current_dept_emp
INFO  : Semantic Analysis Completed
INFO  : Returning Hive schema: Schema(fieldSchemas:[FieldSchema(name:_c0, type:bigint, comment:null)], properties:null)
INFO  : Completed compiling command(queryId=hive_20180127003737_611e22dd-873d-48f6-9888-a414f9b5cf0a); Time taken: 0.065 seconds
INFO  : Executing command(queryId=hive_20180127003737_611e22dd-873d-48f6-9888-a414f9b5cf0a): select count(*) from employees.current_dept_emp
INFO  : Query ID = hive_20180127003737_611e22dd-873d-48f6-9888-a414f9b5cf0a
INFO  : Total jobs = 1
INFO  : Launching Job 1 out of 1
INFO  : Starting task [Stage-1:MAPRED] in serial mode
INFO  : Number of reduce tasks determined at compile time: 1
INFO  : In order to change the average load for a reducer (in bytes):
INFO  :   set hive.exec.reducers.bytes.per.reducer=
INFO  : In order to limit the maximum number of reducers:
INFO  :   set hive.exec.reducers.max=
INFO  : In order to set a constant number of reducers:
INFO  :   set mapreduce.job.reduces=
INFO  : number of splits:1
INFO  : Submitting tokens for job: job_1517023991003_0007
INFO  : The url to track the job: http://cdh-vm.dbaglobe.com:8088/proxy/application_1517023991003_0007/
INFO  : Starting Job = job_1517023991003_0007, Tracking URL = http://cdh-vm.dbaglobe.com:8088/proxy/application_1517023991003_0007/
INFO  : Kill Command = /opt/cloudera/parcels/CDH-5.13.1-1.cdh5.13.1.p0.2/lib/hadoop/bin/hadoop job  -kill job_1517023991003_0007
INFO  : Hadoop job information for Stage-1: number of mappers: 1; number of reducers: 1
INFO  : 2018-01-27 00:37:50,690 Stage-1 map = 0%,  reduce = 0%
INFO  : 2018-01-27 00:37:58,188 Stage-1 map = 100%,  reduce = 0%, Cumulative CPU 1.83 sec
INFO  : 2018-01-27 00:38:05,606 Stage-1 map = 100%,  reduce = 100%, Cumulative CPU 3.79 sec
INFO  : MapReduce Total cumulative CPU time: 3 seconds 790 msec
INFO  : Ended Job = job_1517023991003_0007
INFO  : MapReduce Jobs Launched: 
INFO  : Stage-Stage-1: Map: 1  Reduce: 1   Cumulative CPU: 3.79 sec   HDFS Read: 10118840 HDFS Write: 7 SUCCESS
INFO  : Total MapReduce CPU Time Spent: 3 seconds 790 msec
INFO  : Completed executing command(queryId=hive_20180127003737_611e22dd-873d-48f6-9888-a414f9b5cf0a); Time taken: 23.26 seconds
INFO  : OK
+---------+--+
|   _c0   |
+---------+--+
| 300024  |
+---------+--+
1 row selected (23.371 seconds)


Use sqoop to perform incremental import (--check-column=id --incremental=append --last-value=)

$
0
0

[donghua@cdh-vm ~]$ mysql -u employee_user -ppassword -D employees

MariaDB [employees]> create table t1 (id int primary key, c1 varchar(10));

MariaDB [employees]> insert into t1 values(1,'a'),(2,'b');

MariaDB [employees]> select * from t1;
+----+------+
| id | c1   |
+----+------+
|  1 | a    |
|  2 | b    |
+----+------+
2 rows in set (0.00 sec)

[donghua@cdh-vm ~]$ beeline -u jdbc:hive2://localhost:10000/default -n donghua --silent=true
0: jdbc:hive2://localhost:10000/default> create table employees.t1(id int, c1 varchar(10));


[donghua@cdh-vm ~]$ sqoop import --connect jdbc:mysql://cdh-vm.dbaglobe.com/employees \
>  --username employee_user --password password --table t1 \
>  --split-by=id --hive-import --hive-table=employees.t1 \
>  --warehouse-dir=/user/hive/warehouse \
>  --check-column=id --incremental=append --last-value=0
Warning: /opt/cloudera/parcels/CDH-5.13.1-1.cdh5.13.1.p0.2/bin/../lib/sqoop/../accumulo does not exist! Accumulo imports will fail.
Please set $ACCUMULO_HOME to the root of your Accumulo installation.
18/01/27 03:54:38 INFO sqoop.Sqoop: Running Sqoop version: 1.4.6-cdh5.13.1
18/01/27 03:54:38 WARN tool.BaseSqoopTool: Setting your password on the command-line is insecure. Consider using -P instead.
18/01/27 03:54:38 INFO tool.BaseSqoopTool: Using Hive-specific delimiters for output. You can override
18/01/27 03:54:38 INFO tool.BaseSqoopTool: delimiters with --fields-terminated-by, etc.
18/01/27 03:54:38 WARN tool.BaseSqoopTool: It seems that you're doing hive import directly into default
18/01/27 03:54:38 WARN tool.BaseSqoopTool: hive warehouse directory which is not supported. Sqoop is
18/01/27 03:54:38 WARN tool.BaseSqoopTool: firstly importing data into separate directory and then
18/01/27 03:54:38 WARN tool.BaseSqoopTool: inserting data into hive. Please consider removing
18/01/27 03:54:38 WARN tool.BaseSqoopTool: --target-dir or --warehouse-dir into /user/hive/warehouse in
18/01/27 03:54:38 WARN tool.BaseSqoopTool: case that you will detect any issues.
18/01/27 03:54:38 INFO manager.MySQLManager: Preparing to use a MySQL streaming resultset.
18/01/27 03:54:38 INFO tool.CodeGenTool: Beginning code generation
18/01/27 03:54:38 INFO manager.SqlManager: Executing SQL statement: SELECT t.* FROM `t1` AS t LIMIT 1
18/01/27 03:54:38 INFO manager.SqlManager: Executing SQL statement: SELECT t.* FROM `t1` AS t LIMIT 1
18/01/27 03:54:38 INFO orm.CompilationManager: HADOOP_MAPRED_HOME is /opt/cloudera/parcels/CDH/lib/hadoop-mapreduce
Note: /tmp/sqoop-donghua/compile/1941b9efeafd888916e872561fa71b1d/t1.java uses or overrides a deprecated API.
Note: Recompile with -Xlint:deprecation for details.
18/01/27 03:54:40 INFO orm.CompilationManager: Writing jar file: /tmp/sqoop-donghua/compile/1941b9efeafd888916e872561fa71b1d/t1.jar
18/01/27 03:54:41 INFO tool.ImportTool: Maximal id query for free form incremental import: SELECT MAX(`id`) FROM `t1`
18/01/27 03:54:41 INFO tool.ImportTool: Incremental import based on column `id`
18/01/27 03:54:41 INFO tool.ImportTool: Lower bound value: 0
18/01/27 03:54:41 INFO tool.ImportTool: Upper bound value: 2
18/01/27 03:54:41 WARN manager.MySQLManager: It looks like you are importing from mysql.
18/01/27 03:54:41 WARN manager.MySQLManager: This transfer can be faster! Use the --direct
18/01/27 03:54:41 WARN manager.MySQLManager: option to exercise a MySQL-specific fast path.
18/01/27 03:54:41 INFO manager.MySQLManager: Setting zero DATETIME behavior to convertToNull (mysql)
18/01/27 03:54:41 INFO mapreduce.ImportJobBase: Beginning import of t1
18/01/27 03:54:41 INFO Configuration.deprecation: mapred.jar is deprecated. Instead, use mapreduce.job.jar
18/01/27 03:54:41 INFO Configuration.deprecation: mapred.map.tasks is deprecated. Instead, use mapreduce.job.maps
18/01/27 03:54:41 INFO client.RMProxy: Connecting to ResourceManager at cdh-vm.dbaglobe.com/192.168.56.10:8032
18/01/27 03:54:46 INFO db.DBInputFormat: Using read commited transaction isolation
18/01/27 03:54:46 INFO db.DataDrivenDBInputFormat: BoundingValsQuery: SELECT MIN(`id`), MAX(`id`) FROM `t1` WHERE ( `id` > 0 AND `id` <= 2 )
18/01/27 03:54:46 INFO db.IntegerSplitter: Split size: 0; Num splits: 4 from: 1 to: 2
18/01/27 03:54:46 INFO mapreduce.JobSubmitter: number of splits:2
18/01/27 03:54:47 INFO mapreduce.JobSubmitter: Submitting tokens for job: job_1517023991003_0009
18/01/27 03:54:47 INFO impl.YarnClientImpl: Submitted application application_1517023991003_0009
18/01/27 03:54:47 INFO mapreduce.Job: The url to track the job: http://cdh-vm.dbaglobe.com:8088/proxy/application_1517023991003_0009/
18/01/27 03:54:47 INFO mapreduce.Job: Running job: job_1517023991003_0009
18/01/27 03:54:54 INFO mapreduce.Job: Job job_1517023991003_0009 running in uber mode : false
18/01/27 03:54:54 INFO mapreduce.Job:  map 0% reduce 0%
18/01/27 03:55:02 INFO mapreduce.Job:  map 50% reduce 0%
18/01/27 03:55:06 INFO mapreduce.Job:  map 100% reduce 0%
18/01/27 03:55:07 INFO mapreduce.Job: Job job_1517023991003_0009 completed successfully
18/01/27 03:55:07 INFO mapreduce.Job: Counters: 30
File System Counters
FILE: Number of bytes read=0
FILE: Number of bytes written=350308
FILE: Number of read operations=0
FILE: Number of large read operations=0
FILE: Number of write operations=0
HDFS: Number of bytes read=197
HDFS: Number of bytes written=8
HDFS: Number of read operations=8
HDFS: Number of large read operations=0
HDFS: Number of write operations=4
Job Counters 
Launched map tasks=2
Other local map tasks=2
Total time spent by all maps in occupied slots (ms)=7843
Total time spent by all reduces in occupied slots (ms)=0
Total time spent by all map tasks (ms)=7843
Total vcore-milliseconds taken by all map tasks=7843
Total megabyte-milliseconds taken by all map tasks=12046848
Map-Reduce Framework
Map input records=2
Map output records=2
Input split bytes=197
Spilled Records=0
Failed Shuffles=0
Merged Map outputs=0
GC time elapsed (ms)=110
CPU time spent (ms)=1970
Physical memory (bytes) snapshot=413765632
Virtual memory (bytes) snapshot=5572857856
Total committed heap usage (bytes)=402653184
File Input Format Counters 
Bytes Read=0
File Output Format Counters 
Bytes Written=8
18/01/27 03:55:07 INFO mapreduce.ImportJobBase: Transferred 8 bytes in 26.2002 seconds (0.3053 bytes/sec)
18/01/27 03:55:07 INFO mapreduce.ImportJobBase: Retrieved 2 records.
18/01/27 03:55:07 INFO util.AppendUtils: Creating missing output directory - t1
18/01/27 03:55:07 INFO manager.SqlManager: Executing SQL statement: SELECT t.* FROM `t1` AS t LIMIT 1
18/01/27 03:55:07 INFO hive.HiveImport: Loading uploaded data into Hive

Logging initialized using configuration in jar:file:/opt/cloudera/parcels/CDH-5.13.1-1.cdh5.13.1.p0.2/jars/hive-common-1.1.0-cdh5.13.1.jar!/hive-log4j.properties
OK
Time taken: 2.037 seconds
Loading data to table employees.t1
Table employees.t1 stats: [numFiles=2, totalSize=8]
OK
Time taken: 0.646 seconds

0: jdbc:hive2://localhost:10000/default> select * from employees.t1;
+--------+--------+--+
| t1.id  | t1.c1  |
+--------+--------+--+
| 1      | a      |
| 2      | b      |
+--------+--------+--+


MariaDB [employees]> insert into t1 values(3,'a'),(4,'b');
Query OK, 2 rows affected (0.00 sec)
Records: 2  Duplicates: 0  Warnings: 0


[donghua@cdh-vm ~]$ sqoop import --connect jdbc:mysql://cdh-vm.dbaglobe.com/employees  --username employee_user --password password --table t1  --split-by=id --hive-import --hive-table=employees.t1  --warehouse-dir=/user/hive/warehouse  --check-column=id --incremental=append --last-value=2
Warning: /opt/cloudera/parcels/CDH-5.13.1-1.cdh5.13.1.p0.2/bin/../lib/sqoop/../accumulo does not exist! Accumulo imports will fail.
Please set $ACCUMULO_HOME to the root of your Accumulo installation.
18/01/27 04:11:31 INFO sqoop.Sqoop: Running Sqoop version: 1.4.6-cdh5.13.1
18/01/27 04:11:31 WARN tool.BaseSqoopTool: Setting your password on the command-line is insecure. Consider using -P instead.
18/01/27 04:11:31 INFO tool.BaseSqoopTool: Using Hive-specific delimiters for output. You can override
18/01/27 04:11:31 INFO tool.BaseSqoopTool: delimiters with --fields-terminated-by, etc.
18/01/27 04:11:31 WARN tool.BaseSqoopTool: It seems that you're doing hive import directly into default
18/01/27 04:11:31 WARN tool.BaseSqoopTool: hive warehouse directory which is not supported. Sqoop is
18/01/27 04:11:31 WARN tool.BaseSqoopTool: firstly importing data into separate directory and then
18/01/27 04:11:31 WARN tool.BaseSqoopTool: inserting data into hive. Please consider removing
18/01/27 04:11:31 WARN tool.BaseSqoopTool: --target-dir or --warehouse-dir into /user/hive/warehouse in
18/01/27 04:11:31 WARN tool.BaseSqoopTool: case that you will detect any issues.
18/01/27 04:11:31 INFO manager.MySQLManager: Preparing to use a MySQL streaming resultset.
18/01/27 04:11:31 INFO tool.CodeGenTool: Beginning code generation
18/01/27 04:11:31 INFO manager.SqlManager: Executing SQL statement: SELECT t.* FROM `t1` AS t LIMIT 1
18/01/27 04:11:31 INFO manager.SqlManager: Executing SQL statement: SELECT t.* FROM `t1` AS t LIMIT 1
18/01/27 04:11:31 INFO orm.CompilationManager: HADOOP_MAPRED_HOME is /opt/cloudera/parcels/CDH/lib/hadoop-mapreduce
Note: /tmp/sqoop-donghua/compile/80c2f1f6c1f1b6c4b9fca928aa6353a8/t1.java uses or overrides a deprecated API.
Note: Recompile with -Xlint:deprecation for details.
18/01/27 04:11:33 INFO orm.CompilationManager: Writing jar file: /tmp/sqoop-donghua/compile/80c2f1f6c1f1b6c4b9fca928aa6353a8/t1.jar
18/01/27 04:11:34 INFO tool.ImportTool: Maximal id query for free form incremental import: SELECT MAX(`id`) FROM `t1`
18/01/27 04:11:34 INFO tool.ImportTool: Incremental import based on column `id`
18/01/27 04:11:34 INFO tool.ImportTool: Lower bound value: 2
18/01/27 04:11:34 INFO tool.ImportTool: Upper bound value: 4
18/01/27 04:11:34 WARN manager.MySQLManager: It looks like you are importing from mysql.
18/01/27 04:11:34 WARN manager.MySQLManager: This transfer can be faster! Use the --direct
18/01/27 04:11:34 WARN manager.MySQLManager: option to exercise a MySQL-specific fast path.
18/01/27 04:11:34 INFO manager.MySQLManager: Setting zero DATETIME behavior to convertToNull (mysql)
18/01/27 04:11:34 INFO mapreduce.ImportJobBase: Beginning import of t1
18/01/27 04:11:34 INFO Configuration.deprecation: mapred.jar is deprecated. Instead, use mapreduce.job.jar
18/01/27 04:11:34 INFO Configuration.deprecation: mapred.map.tasks is deprecated. Instead, use mapreduce.job.maps
18/01/27 04:11:34 INFO client.RMProxy: Connecting to ResourceManager at cdh-vm.dbaglobe.com/192.168.56.10:8032
18/01/27 04:11:38 INFO db.DBInputFormat: Using read commited transaction isolation
18/01/27 04:11:38 INFO db.DataDrivenDBInputFormat: BoundingValsQuery: SELECT MIN(`id`), MAX(`id`) FROM `t1` WHERE ( `id` > 2 AND `id` <= 4 )
18/01/27 04:11:38 INFO db.IntegerSplitter: Split size: 0; Num splits: 4 from: 3 to: 4
18/01/27 04:11:38 INFO mapreduce.JobSubmitter: number of splits:2
18/01/27 04:11:38 INFO mapreduce.JobSubmitter: Submitting tokens for job: job_1517023991003_0012
18/01/27 04:11:38 INFO impl.YarnClientImpl: Submitted application application_1517023991003_0012
18/01/27 04:11:38 INFO mapreduce.Job: The url to track the job: http://cdh-vm.dbaglobe.com:8088/proxy/application_1517023991003_0012/
18/01/27 04:11:38 INFO mapreduce.Job: Running job: job_1517023991003_0012
18/01/27 04:11:45 INFO mapreduce.Job: Job job_1517023991003_0012 running in uber mode : false
18/01/27 04:11:45 INFO mapreduce.Job:  map 0% reduce 0%
18/01/27 04:11:51 INFO mapreduce.Job:  map 50% reduce 0%
18/01/27 04:11:57 INFO mapreduce.Job:  map 100% reduce 0%
18/01/27 04:11:57 INFO mapreduce.Job: Job job_1517023991003_0012 completed successfully
18/01/27 04:11:57 INFO mapreduce.Job: Counters: 30
File System Counters
FILE: Number of bytes read=0
FILE: Number of bytes written=350308
FILE: Number of read operations=0
FILE: Number of large read operations=0
FILE: Number of write operations=0
HDFS: Number of bytes read=197
HDFS: Number of bytes written=8
HDFS: Number of read operations=8
HDFS: Number of large read operations=0
HDFS: Number of write operations=4
Job Counters 
Launched map tasks=2
Other local map tasks=2
Total time spent by all maps in occupied slots (ms)=7531
Total time spent by all reduces in occupied slots (ms)=0
Total time spent by all map tasks (ms)=7531
Total vcore-milliseconds taken by all map tasks=7531
Total megabyte-milliseconds taken by all map tasks=11567616
Map-Reduce Framework
Map input records=2
Map output records=2
Input split bytes=197
Spilled Records=0
Failed Shuffles=0
Merged Map outputs=0
GC time elapsed (ms)=114
CPU time spent (ms)=1800
Physical memory (bytes) snapshot=403120128
Virtual memory (bytes) snapshot=5573816320
Total committed heap usage (bytes)=359137280
File Input Format Counters 
Bytes Read=0
File Output Format Counters 
Bytes Written=8
18/01/27 04:11:57 INFO mapreduce.ImportJobBase: Transferred 8 bytes in 23.359 seconds (0.3425 bytes/sec)
18/01/27 04:11:57 INFO mapreduce.ImportJobBase: Retrieved 2 records.
18/01/27 04:11:57 INFO util.AppendUtils: Creating missing output directory - t1
18/01/27 04:11:57 INFO manager.SqlManager: Executing SQL statement: SELECT t.* FROM `t1` AS t LIMIT 1
18/01/27 04:11:57 INFO hive.HiveImport: Loading uploaded data into Hive

Logging initialized using configuration in jar:file:/opt/cloudera/parcels/CDH-5.13.1-1.cdh5.13.1.p0.2/jars/hive-common-1.1.0-cdh5.13.1.jar!/hive-log4j.properties
OK
Time taken: 1.853 seconds
Loading data to table employees.t1
Table employees.t1 stats: [numFiles=4, numRows=0, totalSize=16, rawDataSize=0]
OK
Time taken: 0.603 seconds

0: jdbc:hive2://localhost:10000/default> select * from employees.t1;
+--------+--------+--+
| t1.id  | t1.c1  |
+--------+--------+--+
| 1      | a      |
| 3      | a      |
| 2      | b      |
| 4      | b      |
+--------+--------+--+

[donghua@cdh-vm ~]$ sqoop job --create emp_t1_incr  -- import --connect jdbc:mysql://cdh-vm.dbaglobe.com/employees  --username employee_user --password password --table t1  --split-by=id --hive-import --hive-table=employees.t1  --warehouse-dir=/user/hive/warehouse  --check-column=id --incremental=append --last-value=4
Warning: /opt/cloudera/parcels/CDH-5.13.1-1.cdh5.13.1.p0.2/bin/../lib/sqoop/../accumulo does not exist! Accumulo imports will fail.
Please set $ACCUMULO_HOME to the root of your Accumulo installation.
18/01/27 04:21:32 INFO sqoop.Sqoop: Running Sqoop version: 1.4.6-cdh5.13.1
18/01/27 04:21:32 WARN tool.BaseSqoopTool: Setting your password on the command-line is insecure. Consider using -P instead.
18/01/27 04:21:32 INFO tool.BaseSqoopTool: Using Hive-specific delimiters for output. You can override
18/01/27 04:21:32 INFO tool.BaseSqoopTool: delimiters with --fields-terminated-by, etc.
18/01/27 04:21:32 WARN tool.BaseSqoopTool: It seems that you're doing hive import directly into default
18/01/27 04:21:32 WARN tool.BaseSqoopTool: hive warehouse directory which is not supported. Sqoop is
18/01/27 04:21:32 WARN tool.BaseSqoopTool: firstly importing data into separate directory and then
18/01/27 04:21:32 WARN tool.BaseSqoopTool: inserting data into hive. Please consider removing
18/01/27 04:21:32 WARN tool.BaseSqoopTool: --target-dir or --warehouse-dir into /user/hive/warehouse in
18/01/27 04:21:32 WARN tool.BaseSqoopTool: case that you will detect any issues.

[donghua@cdh-vm ~]$ sqoop job --list
Warning: /opt/cloudera/parcels/CDH-5.13.1-1.cdh5.13.1.p0.2/bin/../lib/sqoop/../accumulo does not exist! Accumulo imports will fail.
Please set $ACCUMULO_HOME to the root of your Accumulo installation.
18/01/27 04:30:13 INFO sqoop.Sqoop: Running Sqoop version: 1.4.6-cdh5.13.1
Available jobs:
  emp_t1_incr

[donghua@cdh-vm ~]$ sqoop job --show emp_t1_incr
Warning: /opt/cloudera/parcels/CDH-5.13.1-1.cdh5.13.1.p0.2/bin/../lib/sqoop/../accumulo does not exist! Accumulo imports will fail.
Please set $ACCUMULO_HOME to the root of your Accumulo installation.
18/01/27 04:35:40 INFO sqoop.Sqoop: Running Sqoop version: 1.4.6-cdh5.13.1
Enter password: 
Job: emp_t1_incr
Tool: import
Options:
----------------------------
verbose = false
hcatalog.drop.and.create.table = false
incremental.last.value = 4
db.connect.string = jdbc:mysql://cdh-vm.dbaglobe.com/employees
codegen.output.delimiters.escape = 0
codegen.output.delimiters.enclose.required = false
codegen.input.delimiters.field = 0
mainframe.input.dataset.type = p
split.limit = null
hbase.create.table = false
db.require.password = true
hdfs.append.dir = true
db.table = t1
codegen.input.delimiters.escape = 0
accumulo.create.table = false
import.fetch.size = null
codegen.input.delimiters.enclose.required = false
db.username = employee_user
reset.onemapper = false
codegen.output.delimiters.record = 10
import.max.inline.lob.size = 16777216
sqoop.throwOnError = false
hbase.bulk.load.enabled = false
hcatalog.create.table = false
db.clear.staging.table = false
incremental.col = id
codegen.input.delimiters.record = 0
hdfs.warehouse.dir = /user/hive/warehouse
enable.compression = false
hive.overwrite.table = false
hive.import = true
codegen.input.delimiters.enclose = 0
hive.table.name = employees.t1
accumulo.batch.size = 10240000
hive.drop.delims = false
customtool.options.jsonmap = {}
codegen.output.delimiters.enclose = 0
hdfs.delete-target.dir = false
codegen.output.dir = .
codegen.auto.compile.dir = true
relaxed.isolation = false
mapreduce.num.mappers = 4
accumulo.max.latency = 5000
import.direct.split.size = 0
sqlconnection.metadata.transaction.isolation.level = 2
codegen.output.delimiters.field = 1
export.new.update = UpdateOnly
incremental.mode = AppendRows
hdfs.file.format = TextFile
sqoop.oracle.escaping.disabled = true
codegen.compile.dir = /tmp/sqoop-donghua/compile/e7212eb92686a1486fa1cd44a6c9afc7
direct.import = false
temporary.dirRoot = _sqoop
db.split.column = id
hive.fail.table.exists = false
db.batch = false


[donghua@cdh-vm ~]$ sqoop job --exec emp_t1_incr
Warning: /opt/cloudera/parcels/CDH-5.13.1-1.cdh5.13.1.p0.2/bin/../lib/sqoop/../accumulo does not exist! Accumulo imports will fail.
Please set $ACCUMULO_HOME to the root of your Accumulo installation.
18/01/27 04:38:01 INFO sqoop.Sqoop: Running Sqoop version: 1.4.6-cdh5.13.1
Enter password: 
18/01/27 04:38:06 WARN tool.BaseSqoopTool: It seems that you're doing hive import directly into default
18/01/27 04:38:06 WARN tool.BaseSqoopTool: hive warehouse directory which is not supported. Sqoop is
18/01/27 04:38:06 WARN tool.BaseSqoopTool: firstly importing data into separate directory and then
18/01/27 04:38:06 WARN tool.BaseSqoopTool: inserting data into hive. Please consider removing
18/01/27 04:38:06 WARN tool.BaseSqoopTool: --target-dir or --warehouse-dir into /user/hive/warehouse in
18/01/27 04:38:06 WARN tool.BaseSqoopTool: case that you will detect any issues.
18/01/27 04:38:06 INFO manager.MySQLManager: Preparing to use a MySQL streaming resultset.
18/01/27 04:38:06 INFO tool.CodeGenTool: Beginning code generation
18/01/27 04:38:07 INFO manager.SqlManager: Executing SQL statement: SELECT t.* FROM `t1` AS t LIMIT 1
18/01/27 04:38:07 INFO manager.SqlManager: Executing SQL statement: SELECT t.* FROM `t1` AS t LIMIT 1
18/01/27 04:38:07 INFO orm.CompilationManager: HADOOP_MAPRED_HOME is /opt/cloudera/parcels/CDH/lib/hadoop-mapreduce
Note: /tmp/sqoop-donghua/compile/e3c397f1a5469f870ba19e95b80a66a2/t1.java uses or overrides a deprecated API.
Note: Recompile with -Xlint:deprecation for details.
18/01/27 04:38:08 INFO orm.CompilationManager: Writing jar file: /tmp/sqoop-donghua/compile/e3c397f1a5469f870ba19e95b80a66a2/t1.jar
18/01/27 04:38:09 INFO tool.ImportTool: Maximal id query for free form incremental import: SELECT MAX(`id`) FROM `t1`
18/01/27 04:38:09 INFO tool.ImportTool: Incremental import based on column `id`
18/01/27 04:38:09 INFO tool.ImportTool: Lower bound value: 4
18/01/27 04:38:09 INFO tool.ImportTool: Upper bound value: 6
18/01/27 04:38:09 WARN manager.MySQLManager: It looks like you are importing from mysql.
18/01/27 04:38:09 WARN manager.MySQLManager: This transfer can be faster! Use the --direct
18/01/27 04:38:09 WARN manager.MySQLManager: option to exercise a MySQL-specific fast path.
18/01/27 04:38:09 INFO manager.MySQLManager: Setting zero DATETIME behavior to convertToNull (mysql)
18/01/27 04:38:09 INFO mapreduce.ImportJobBase: Beginning import of t1
18/01/27 04:38:09 INFO Configuration.deprecation: mapred.jar is deprecated. Instead, use mapreduce.job.jar
18/01/27 04:38:09 INFO Configuration.deprecation: mapred.map.tasks is deprecated. Instead, use mapreduce.job.maps
18/01/27 04:38:09 INFO client.RMProxy: Connecting to ResourceManager at cdh-vm.dbaglobe.com/192.168.56.10:8032
18/01/27 04:38:12 INFO db.DBInputFormat: Using read commited transaction isolation
18/01/27 04:38:12 INFO db.DataDrivenDBInputFormat: BoundingValsQuery: SELECT MIN(`id`), MAX(`id`) FROM `t1` WHERE ( `id` > 4 AND `id` <= 6 )
18/01/27 04:38:12 INFO db.IntegerSplitter: Split size: 0; Num splits: 4 from: 5 to: 6
18/01/27 04:38:12 INFO mapreduce.JobSubmitter: number of splits:2
18/01/27 04:38:12 INFO mapreduce.JobSubmitter: Submitting tokens for job: job_1517023991003_0013
18/01/27 04:38:12 INFO impl.YarnClientImpl: Submitted application application_1517023991003_0013
18/01/27 04:38:12 INFO mapreduce.Job: The url to track the job: http://cdh-vm.dbaglobe.com:8088/proxy/application_1517023991003_0013/
18/01/27 04:38:12 INFO mapreduce.Job: Running job: job_1517023991003_0013
18/01/27 04:38:20 INFO mapreduce.Job: Job job_1517023991003_0013 running in uber mode : false
18/01/27 04:38:20 INFO mapreduce.Job:  map 0% reduce 0%
18/01/27 04:38:26 INFO mapreduce.Job:  map 50% reduce 0%
18/01/27 04:38:31 INFO mapreduce.Job:  map 100% reduce 0%
18/01/27 04:38:32 INFO mapreduce.Job: Job job_1517023991003_0013 completed successfully
18/01/27 04:38:33 INFO mapreduce.Job: Counters: 30
File System Counters
FILE: Number of bytes read=0
FILE: Number of bytes written=351166
FILE: Number of read operations=0
FILE: Number of large read operations=0
FILE: Number of write operations=0
HDFS: Number of bytes read=197
HDFS: Number of bytes written=8
HDFS: Number of read operations=8
HDFS: Number of large read operations=0
HDFS: Number of write operations=4
Job Counters 
Launched map tasks=2
Other local map tasks=2
Total time spent by all maps in occupied slots (ms)=8071
Total time spent by all reduces in occupied slots (ms)=0
Total time spent by all map tasks (ms)=8071
Total vcore-milliseconds taken by all map tasks=8071
Total megabyte-milliseconds taken by all map tasks=12397056
Map-Reduce Framework
Map input records=2
Map output records=2
Input split bytes=197
Spilled Records=0
Failed Shuffles=0
Merged Map outputs=0
GC time elapsed (ms)=136
CPU time spent (ms)=1820
Physical memory (bytes) snapshot=404738048
Virtual memory (bytes) snapshot=5573140480
Total committed heap usage (bytes)=354942976
File Input Format Counters 
Bytes Read=0
File Output Format Counters 
Bytes Written=8
18/01/27 04:38:33 INFO mapreduce.ImportJobBase: Transferred 8 bytes in 23.4364 seconds (0.3413 bytes/sec)
18/01/27 04:38:33 INFO mapreduce.ImportJobBase: Retrieved 2 records.
18/01/27 04:38:33 INFO util.AppendUtils: Creating missing output directory - t1
18/01/27 04:38:33 INFO manager.SqlManager: Executing SQL statement: SELECT t.* FROM `t1` AS t LIMIT 1
18/01/27 04:38:33 INFO hive.HiveImport: Loading uploaded data into Hive

Logging initialized using configuration in jar:file:/opt/cloudera/parcels/CDH-5.13.1-1.cdh5.13.1.p0.2/jars/hive-common-1.1.0-cdh5.13.1.jar!/hive-log4j.properties
OK
Time taken: 1.95 seconds
Loading data to table employees.t1
Table employees.t1 stats: [numFiles=6, numRows=0, totalSize=24, rawDataSize=0]
OK
Time taken: 0.664 seconds

[donghua@cdh-vm ~]$ sqoop job --show emp_t1_incr
Warning: /opt/cloudera/parcels/CDH-5.13.1-1.cdh5.13.1.p0.2/bin/../lib/sqoop/../accumulo does not exist! Accumulo imports will fail.
Please set $ACCUMULO_HOME to the root of your Accumulo installation.
18/01/27 04:38:50 INFO sqoop.Sqoop: Running Sqoop version: 1.4.6-cdh5.13.1
Enter password: 
Job: emp_t1_incr
Tool: import
Options:
----------------------------
verbose = false
hcatalog.drop.and.create.table = false
incremental.last.value = 6
db.connect.string = jdbc:mysql://cdh-vm.dbaglobe.com/employees
codegen.output.delimiters.escape = 0
codegen.output.delimiters.enclose.required = false
codegen.input.delimiters.field = 0
mainframe.input.dataset.type = p
split.limit = null
hbase.create.table = false
db.require.password = true
hdfs.append.dir = true
db.table = t1
codegen.input.delimiters.escape = 0
accumulo.create.table = false
import.fetch.size = null
codegen.input.delimiters.enclose.required = false
db.username = employee_user
reset.onemapper = false
codegen.output.delimiters.record = 10
import.max.inline.lob.size = 16777216
sqoop.throwOnError = false
hbase.bulk.load.enabled = false
hcatalog.create.table = false
db.clear.staging.table = false
incremental.col = id
codegen.input.delimiters.record = 0
hdfs.warehouse.dir = /user/hive/warehouse
enable.compression = false
hive.overwrite.table = false
hive.import = true
codegen.input.delimiters.enclose = 0
hive.table.name = employees.t1
accumulo.batch.size = 10240000
hive.drop.delims = false
customtool.options.jsonmap = {}
codegen.output.delimiters.enclose = 0
hdfs.delete-target.dir = false
codegen.output.dir = .
codegen.auto.compile.dir = true
relaxed.isolation = false
mapreduce.num.mappers = 4
accumulo.max.latency = 5000
import.direct.split.size = 0
sqlconnection.metadata.transaction.isolation.level = 2
codegen.output.delimiters.field = 1
export.new.update = UpdateOnly
incremental.mode = AppendRows
hdfs.file.format = TextFile
sqoop.oracle.escaping.disabled = true
codegen.compile.dir = /tmp/sqoop-donghua/compile/39496d079794ae53a008a2da9cd2ac4a
direct.import = false
temporary.dirRoot = _sqoop
db.split.column = id
hive.fail.table.exists = false
db.batch = false

Use Sqoop to perform incremental data loading (--incremental=lastmodified for timestamp column)

$
0
0
[donghua@cdh-vm ~]$ hdfs dfs -cat /user/donghua/t2/part-m-00000
1,2018-01-27 04:50:07.0
2,2018-01-27 04:50:18.0
[donghua@cdh-vm ~]$ 

MariaDB [employees]> insert into t2 values(3,current_timestamp());
Query OK, 1 row affected (0.01 sec)

MariaDB [employees]> update t2 set last_updated_at=current_timestamp() where id=2;
Query OK, 1 row affected (0.07 sec)
Rows matched: 1  Changed: 1  Warnings: 0

MariaDB [employees]> select * from t2;
+----+---------------------+
| id | last_updated_at     |
+----+---------------------+
|  1 | 2018-01-27 04:50:07 |
|  2 | 2018-01-27 05:10:14 |
|  3 | 2018-01-27 05:09:45 |
+----+---------------------+
3 rows in set (0.00 sec)

MariaDB [employees]> 


[donghua@cdh-vm ~]$ sqoop import --connect jdbc:mysql://cdh-vm.dbaglobe.com/employees  --username employee_user --password password --table t2  -m 1 --target-dir=/user/donghua/t2 --check-column=last_updated_at --incremental=lastmodified --last-value='2018-01-27 05:06:03.0'
Warning: /opt/cloudera/parcels/CDH-5.13.1-1.cdh5.13.1.p0.2/bin/../lib/sqoop/../accumulo does not exist! Accumulo imports will fail.
Please set $ACCUMULO_HOME to the root of your Accumulo installation.
18/01/27 05:11:59 INFO sqoop.Sqoop: Running Sqoop version: 1.4.6-cdh5.13.1
18/01/27 05:11:59 WARN tool.BaseSqoopTool: Setting your password on the command-line is insecure. Consider using -P instead.
18/01/27 05:12:00 INFO manager.MySQLManager: Preparing to use a MySQL streaming resultset.
18/01/27 05:12:00 INFO tool.CodeGenTool: Beginning code generation
18/01/27 05:12:00 INFO manager.SqlManager: Executing SQL statement: SELECT t.* FROM `t2` AS t LIMIT 1
18/01/27 05:12:00 INFO manager.SqlManager: Executing SQL statement: SELECT t.* FROM `t2` AS t LIMIT 1
18/01/27 05:12:00 INFO orm.CompilationManager: HADOOP_MAPRED_HOME is /opt/cloudera/parcels/CDH/lib/hadoop-mapreduce
Note: /tmp/sqoop-donghua/compile/631be22fe0124698ede97beba0c8288e/t2.java uses or overrides a deprecated API.
Note: Recompile with -Xlint:deprecation for details.
18/01/27 05:12:01 INFO orm.CompilationManager: Writing jar file: /tmp/sqoop-donghua/compile/631be22fe0124698ede97beba0c8288e/t2.jar
18/01/27 05:12:02 ERROR tool.ImportTool: Import failed: --merge-key or --append is required when using --incremental lastmodified and the output directory exists.

[donghua@cdh-vm ~]$ sqoop import --connect jdbc:mysql://cdh-vm.dbaglobe.com/employees  --username employee_user --password password --table t2  -m 1 --target-dir=/user/donghua/t2 --check-column=last_updated_at --incremental=lastmodified --last-value='2018-01-27 05:06:03.0' --merge-key=id
Warning: /opt/cloudera/parcels/CDH-5.13.1-1.cdh5.13.1.p0.2/bin/../lib/sqoop/../accumulo does not exist! Accumulo imports will fail.
Please set $ACCUMULO_HOME to the root of your Accumulo installation.
18/01/27 05:12:40 INFO sqoop.Sqoop: Running Sqoop version: 1.4.6-cdh5.13.1
18/01/27 05:12:40 WARN tool.BaseSqoopTool: Setting your password on the command-line is insecure. Consider using -P instead.
18/01/27 05:12:40 INFO manager.MySQLManager: Preparing to use a MySQL streaming resultset.
18/01/27 05:12:40 INFO tool.CodeGenTool: Beginning code generation
18/01/27 05:12:41 INFO manager.SqlManager: Executing SQL statement: SELECT t.* FROM `t2` AS t LIMIT 1
18/01/27 05:12:41 INFO manager.SqlManager: Executing SQL statement: SELECT t.* FROM `t2` AS t LIMIT 1
18/01/27 05:12:41 INFO orm.CompilationManager: HADOOP_MAPRED_HOME is /opt/cloudera/parcels/CDH/lib/hadoop-mapreduce
Note: /tmp/sqoop-donghua/compile/2e4f90897221b505b822c323c3cb2b41/t2.java uses or overrides a deprecated API.
Note: Recompile with -Xlint:deprecation for details.
18/01/27 05:12:42 INFO orm.CompilationManager: Writing jar file: /tmp/sqoop-donghua/compile/2e4f90897221b505b822c323c3cb2b41/t2.jar
18/01/27 05:12:43 INFO manager.SqlManager: Executing SQL statement: SELECT t.* FROM `t2` AS t LIMIT 1
18/01/27 05:12:43 INFO tool.ImportTool: Incremental import based on column `last_updated_at`
18/01/27 05:12:43 INFO tool.ImportTool: Lower bound value: '2018-01-27 05:06:03.0'
18/01/27 05:12:43 INFO tool.ImportTool: Upper bound value: '2018-01-27 05:12:43.0'
18/01/27 05:12:43 WARN manager.MySQLManager: It looks like you are importing from mysql.
18/01/27 05:12:43 WARN manager.MySQLManager: This transfer can be faster! Use the --direct
18/01/27 05:12:43 WARN manager.MySQLManager: option to exercise a MySQL-specific fast path.
18/01/27 05:12:43 INFO manager.MySQLManager: Setting zero DATETIME behavior to convertToNull (mysql)
18/01/27 05:12:43 INFO mapreduce.ImportJobBase: Beginning import of t2
18/01/27 05:12:43 INFO Configuration.deprecation: mapred.jar is deprecated. Instead, use mapreduce.job.jar
18/01/27 05:12:43 INFO Configuration.deprecation: mapred.map.tasks is deprecated. Instead, use mapreduce.job.maps
18/01/27 05:12:43 INFO client.RMProxy: Connecting to ResourceManager at cdh-vm.dbaglobe.com/192.168.56.10:8032
18/01/27 05:12:47 INFO db.DBInputFormat: Using read commited transaction isolation
18/01/27 05:12:47 INFO mapreduce.JobSubmitter: number of splits:1
18/01/27 05:12:48 INFO mapreduce.JobSubmitter: Submitting tokens for job: job_1517023991003_0016
18/01/27 05:12:48 INFO impl.YarnClientImpl: Submitted application application_1517023991003_0016
18/01/27 05:12:48 INFO mapreduce.Job: The url to track the job: http://cdh-vm.dbaglobe.com:8088/proxy/application_1517023991003_0016/
18/01/27 05:12:48 INFO mapreduce.Job: Running job: job_1517023991003_0016
18/01/27 05:12:55 INFO mapreduce.Job: Job job_1517023991003_0016 running in uber mode : false
18/01/27 05:12:55 INFO mapreduce.Job:  map 0% reduce 0%
18/01/27 05:13:01 INFO mapreduce.Job:  map 100% reduce 0%
18/01/27 05:13:02 INFO mapreduce.Job: Job job_1517023991003_0016 completed successfully
18/01/27 05:13:02 INFO mapreduce.Job: Counters: 30
File System Counters
FILE: Number of bytes read=0
FILE: Number of bytes written=175177
FILE: Number of read operations=0
FILE: Number of large read operations=0
FILE: Number of write operations=0
HDFS: Number of bytes read=87
HDFS: Number of bytes written=48
HDFS: Number of read operations=4
HDFS: Number of large read operations=0
HDFS: Number of write operations=2
Job Counters 
Launched map tasks=1
Other local map tasks=1
Total time spent by all maps in occupied slots (ms)=4073
Total time spent by all reduces in occupied slots (ms)=0
Total time spent by all map tasks (ms)=4073
Total vcore-milliseconds taken by all map tasks=4073
Total megabyte-milliseconds taken by all map tasks=6256128
Map-Reduce Framework
Map input records=2
Map output records=2
Input split bytes=87
Spilled Records=0
Failed Shuffles=0
Merged Map outputs=0
GC time elapsed (ms)=61
CPU time spent (ms)=920
Physical memory (bytes) snapshot=196935680
Virtual memory (bytes) snapshot=2785828864
Total committed heap usage (bytes)=155713536
File Input Format Counters 
Bytes Read=0
File Output Format Counters 
Bytes Written=48
18/01/27 05:13:02 INFO mapreduce.ImportJobBase: Transferred 48 bytes in 19.1241 seconds (2.5099 bytes/sec)
18/01/27 05:13:02 INFO mapreduce.ImportJobBase: Retrieved 2 records.
18/01/27 05:13:02 INFO tool.ImportTool: Final destination exists, will run merge job.
18/01/27 05:13:02 INFO Configuration.deprecation: mapred.output.key.class is deprecated. Instead, use mapreduce.job.output.key.class
18/01/27 05:13:02 INFO client.RMProxy: Connecting to ResourceManager at cdh-vm.dbaglobe.com/192.168.56.10:8032
18/01/27 05:13:07 INFO input.FileInputFormat: Total input paths to process : 2
18/01/27 05:13:07 INFO mapreduce.JobSubmitter: number of splits:2
18/01/27 05:13:07 INFO mapreduce.JobSubmitter: Submitting tokens for job: job_1517023991003_0017
18/01/27 05:13:08 INFO impl.YarnClientImpl: Submitted application application_1517023991003_0017
18/01/27 05:13:08 INFO mapreduce.Job: The url to track the job: http://cdh-vm.dbaglobe.com:8088/proxy/application_1517023991003_0017/
18/01/27 05:13:08 INFO mapreduce.Job: Running job: job_1517023991003_0017
18/01/27 05:13:15 INFO mapreduce.Job: Job job_1517023991003_0017 running in uber mode : false
18/01/27 05:13:15 INFO mapreduce.Job:  map 0% reduce 0%
18/01/27 05:13:20 INFO mapreduce.Job:  map 50% reduce 0%
18/01/27 05:13:24 INFO mapreduce.Job:  map 100% reduce 0%
18/01/27 05:13:31 INFO mapreduce.Job:  map 100% reduce 100%
18/01/27 05:13:31 INFO mapreduce.Job: Job job_1517023991003_0017 completed successfully
18/01/27 05:13:31 INFO mapreduce.Job: Counters: 49
File System Counters
FILE: Number of bytes read=90
FILE: Number of bytes written=526653
FILE: Number of read operations=0
FILE: Number of large read operations=0
FILE: Number of write operations=0
HDFS: Number of bytes read=386
HDFS: Number of bytes written=72
HDFS: Number of read operations=9
HDFS: Number of large read operations=0
HDFS: Number of write operations=2
Job Counters 
Launched map tasks=2
Launched reduce tasks=1
Data-local map tasks=2
Total time spent by all maps in occupied slots (ms)=6496
Total time spent by all reduces in occupied slots (ms)=3317
Total time spent by all map tasks (ms)=6496
Total time spent by all reduce tasks (ms)=3317
Total vcore-milliseconds taken by all map tasks=6496
Total vcore-milliseconds taken by all reduce tasks=3317
Total megabyte-milliseconds taken by all map tasks=9977856
Total megabyte-milliseconds taken by all reduce tasks=5094912
Map-Reduce Framework
Map input records=4
Map output records=4
Map output bytes=96
Map output materialized bytes=122
Input split bytes=290
Combine input records=0
Combine output records=0
Reduce input groups=3
Reduce shuffle bytes=122
Reduce input records=4
Reduce output records=3
Spilled Records=8
Shuffled Maps =2
Failed Shuffles=0
Merged Map outputs=2
GC time elapsed (ms)=211
CPU time spent (ms)=1900
Physical memory (bytes) snapshot=1147371520
Virtual memory (bytes) snapshot=8375828480
Total committed heap usage (bytes)=1154482176
Shuffle Errors
BAD_ID=0
CONNECTION=0
IO_ERROR=0
WRONG_LENGTH=0
WRONG_MAP=0
WRONG_REDUCE=0
File Input Format Counters 
Bytes Read=96
File Output Format Counters 
Bytes Written=72
18/01/27 05:13:31 INFO tool.ImportTool: Incremental import complete! To run another incremental import of all data following this import, supply the following arguments:
18/01/27 05:13:31 INFO tool.ImportTool:  --incremental lastmodified
18/01/27 05:13:31 INFO tool.ImportTool:   --check-column last_updated_at
18/01/27 05:13:31 INFO tool.ImportTool:   --last-value 2018-01-27 05:12:43.0
18/01/27 05:13:31 INFO tool.ImportTool: (Consider saving this with 'sqoop job --create')
[donghua@cdh-vm ~]$ hdfs dfs -ls /user/donghua/t2/
Found 2 items
-rw-r--r--   1 donghua supergroup          0 2018-01-27 05:13 /user/donghua/t2/_SUCCESS
-rw-r--r--   1 donghua supergroup         72 2018-01-27 05:13 /user/donghua/t2/part-r-00000
[donghua@cdh-vm ~]$ hdfs dfs -cat /user/donghua/t2/part-r-00000
1,2018-01-27 04:50:07.0
2,2018-01-27 05:10:14.0
3,2018-01-27 05:09:45.0
[donghua@cdh-vm ~]$ 

Use sqoop export to move data from HDFS into MySQL

$
0
0

MariaDB [employees]> create table current_dept_emp2 as  select * from current_dept_emp where 1=2;
Query OK, 0 rows affected (0.05 sec)
Records: 0  Duplicates: 0  Warnings: 0

[donghua@cdh-vm ~]$ sqoop export --connect jdbc:mysql://cdh-vm.dbaglobe.com/employees --username employee_user --password password --table current_dept_emp2 --export-dir /user/donghua/current_dept_emp
Warning: /opt/cloudera/parcels/CDH-5.13.1-1.cdh5.13.1.p0.2/bin/../lib/sqoop/../accumulo does not exist! Accumulo imports will fail.
Please set $ACCUMULO_HOME to the root of your Accumulo installation.
18/01/27 05:43:54 INFO sqoop.Sqoop: Running Sqoop version: 1.4.6-cdh5.13.1
18/01/27 05:43:54 WARN tool.BaseSqoopTool: Setting your password on the command-line is insecure. Consider using -P instead.
18/01/27 05:43:55 INFO manager.MySQLManager: Preparing to use a MySQL streaming resultset.
18/01/27 05:43:55 INFO tool.CodeGenTool: Beginning code generation
18/01/27 05:43:55 INFO manager.SqlManager: Executing SQL statement: SELECT t.* FROM `current_dept_emp2` AS t LIMIT 1
18/01/27 05:43:55 INFO manager.SqlManager: Executing SQL statement: SELECT t.* FROM `current_dept_emp2` AS t LIMIT 1
18/01/27 05:43:55 INFO orm.CompilationManager: HADOOP_MAPRED_HOME is /opt/cloudera/parcels/CDH/lib/hadoop-mapreduce
Note: /tmp/sqoop-donghua/compile/4eb832477301808137f8d255765ba2ca/current_dept_emp2.java uses or overrides a deprecated API.
Note: Recompile with -Xlint:deprecation for details.
18/01/27 05:43:56 INFO orm.CompilationManager: Writing jar file: /tmp/sqoop-donghua/compile/4eb832477301808137f8d255765ba2ca/current_dept_emp2.jar
18/01/27 05:43:56 INFO mapreduce.ExportJobBase: Beginning export of current_dept_emp2
18/01/27 05:43:57 INFO Configuration.deprecation: mapred.jar is deprecated. Instead, use mapreduce.job.jar
18/01/27 05:43:58 INFO Configuration.deprecation: mapred.reduce.tasks.speculative.execution is deprecated. Instead, use mapreduce.reduce.speculative
18/01/27 05:43:58 INFO Configuration.deprecation: mapred.map.tasks.speculative.execution is deprecated. Instead, use mapreduce.map.speculative
18/01/27 05:43:58 INFO Configuration.deprecation: mapred.map.tasks is deprecated. Instead, use mapreduce.job.maps
18/01/27 05:43:58 INFO client.RMProxy: Connecting to ResourceManager at cdh-vm.dbaglobe.com/192.168.56.10:8032
18/01/27 05:44:00 INFO input.FileInputFormat: Total input paths to process : 1
18/01/27 05:44:00 INFO input.FileInputFormat: Total input paths to process : 1
18/01/27 05:44:00 INFO mapreduce.JobSubmitter: number of splits:4
18/01/27 05:44:01 INFO mapreduce.JobSubmitter: Submitting tokens for job: job_1517023991003_0018
18/01/27 05:44:01 INFO impl.YarnClientImpl: Submitted application application_1517023991003_0018
18/01/27 05:44:01 INFO mapreduce.Job: The url to track the job: http://cdh-vm.dbaglobe.com:8088/proxy/application_1517023991003_0018/
18/01/27 05:44:01 INFO mapreduce.Job: Running job: job_1517023991003_0018
18/01/27 05:44:08 INFO mapreduce.Job: Job job_1517023991003_0018 running in uber mode : false
18/01/27 05:44:08 INFO mapreduce.Job:  map 0% reduce 0%
18/01/27 05:44:16 INFO mapreduce.Job:  map 25% reduce 0%
18/01/27 05:44:22 INFO mapreduce.Job:  map 50% reduce 0%
18/01/27 05:44:28 INFO mapreduce.Job:  map 75% reduce 0%
18/01/27 05:44:34 INFO mapreduce.Job:  map 100% reduce 0%
18/01/27 05:44:35 INFO mapreduce.Job: Job job_1517023991003_0018 completed successfully
18/01/27 05:44:35 INFO mapreduce.Job: Counters: 30
File System Counters
FILE: Number of bytes read=0
FILE: Number of bytes written=695328
FILE: Number of read operations=0
FILE: Number of large read operations=0
FILE: Number of write operations=0
HDFS: Number of bytes read=10241715
HDFS: Number of bytes written=0
HDFS: Number of read operations=19
HDFS: Number of large read operations=0
HDFS: Number of write operations=0
Job Counters 
Launched map tasks=4
Data-local map tasks=4
Total time spent by all maps in occupied slots (ms)=20479
Total time spent by all reduces in occupied slots (ms)=0
Total time spent by all map tasks (ms)=20479
Total vcore-milliseconds taken by all map tasks=20479
Total megabyte-milliseconds taken by all map tasks=31455744
Map-Reduce Framework
Map input records=300024
Map output records=300024
Input split bytes=711
Spilled Records=0
Failed Shuffles=0
Merged Map outputs=0
GC time elapsed (ms)=332
CPU time spent (ms)=15020
Physical memory (bytes) snapshot=1057984512
Virtual memory (bytes) snapshot=11192446976
Total committed heap usage (bytes)=862453760
File Input Format Counters 
Bytes Read=0
File Output Format Counters 
Bytes Written=0
18/01/27 05:44:35 INFO mapreduce.ExportJobBase: Transferred 9.7673 MB in 37.4601 seconds (266.9952 KB/sec)
18/01/27 05:44:35 INFO mapreduce.ExportJobBase: Exported 300024 records.

MariaDB [employees]> select count(*) from current_dept_emp2;
+----------+
| count(*) |
+----------+
|   300024 |
+----------+
1 row in set (0.09 sec)

Convert Excel into CSV using pandas

$
0
0
Donghuas-MacBook-Air:Downloads donghua$ python
Python 3.6.3 |Anaconda, Inc.| (default, Oct  6 2017, 12:04:38) 
[GCC 4.2.1 Compatible Clang 4.0.1 (tags/RELEASE_401/final)] on darwin
Type "help", "copyright", "credits" or "license" for more information.
>>> import pandas as pd
>>> import numpy as np
>>> df = pd.read_excel("/Users/donghua/Downloads/LN University.xls",sheet_name="Sheet1",header=None, skiprows=3)
>>> df.head(1)
   0       1             2            3       4    5       6   7   \
0   1  鞍山师范学院  201310169001  花楸果实中花青素的提取  创新训练项目  侯文锋  110604   4   

                                          8   9   10     11    12     13   14  \
0  杨晓龙 110607 \n王博  110505 \n陈中意 110629       辛广  教授  15000  5000  10000  550   

                                                  15  
0  本项目以花楸为原材料,通过用表面活性剂结合酸化的常规提取剂辅助超声波法提取花楸果实中花青素,...  
>>> df.to_csv("/Users/donghua/Downloads/LN_University_20180125.csv",sep='\t',header=False, encoding='utf-8') 
>>> exit()

Donghuas-MacBook-Air:Downloads donghua$ 

warning: setlocale: LC_CTYPE: cannot change locale (UTF-8)

$
0
0
Donghuas-MacBook-Air:~ donghua$ ssh 192.168.31.5
Last login: Fri Feb  2 07:19:47 2018 from 192.168.31.177
-bash: warning: setlocale: LC_CTYPE: cannot change locale (UTF-8): No such file or directory


[donghua@localhost ~]$ vi /etc/environment 

# Add following 2 lines
LANG=en_US.utf-8
LC_ALL=en_US.utf-8


Donghuas-MacBook-Air:~ donghua$ ssh 192.168.31.5
Last login: Fri Feb  2 07:25:15 2018 from 192.168.31.177
[donghua@localhost ~]$ 



Use Python script to manage Cloudera CDH services

$
0
0


# pip install cm-api
#API Doc: https://cloudera.github.io/cm_api/docs/python-client/

[donghua@cdh-vm scripts]$ ./start_cluster.py 
: cluster; version: CDH5

Name: hdfs             Before: STOPPED       Result: True
Name: zookeeper        Before: STOPPED       Result: True
Name: yarn             Before: STOPPED       Result: True
Name: spark_on_yarn    Before: STOPPED       Result: True
Name: hive             Before: STOPPED       Result: True
Name: impala           Before: STOPPED       Result: True
Name: hue              Before: STOPPED       Result: True

[donghua@cdh-vm scripts]$ ./status_cluster.py 
: cluster; version: CDH5

Service Name   Service StateHealth Summary
--------------- ---------------------------
hdfs           STARTED      GOOD
yarn           STARTED      GOOD
spark_on_yarn   STARTED      GOOD
hive           STARTED      GOOD
zookeeper       STARTED      GOOD
impala         STARTED      GOOD
oozie           STOPPED      DISABLED
hue             STARTED      GOOD
sqoop_client   NA           GOOD

Change timezone on Redhat EL7

$
0
0
[root@cdh-vm ~]# timedatectl 
      Local time: Sat 2018-02-03 18:51:25 EST
  Universal time: Sat 2018-02-03 23:51:25 UTC
        RTC time: Sat 2018-02-03 23:51:22
       Time zone: America/New_York (EST, -0500)
     NTP enabled: yes
NTP synchronized: yes
 RTC in local TZ: no
      DST active: no
 Last DST change: DST ended at
                  Sun 2017-11-05 01:59:59 EDT
                  Sun 2017-11-05 01:00:00 EST
 Next DST change: DST begins (the clock jumps one hour forward) at
                  Sun 2018-03-11 01:59:59 EST
                  Sun 2018-03-11 03:00:00 EDT

[root@cdh-vm ~]# timedatectl list-timezones |grep -i singapore
Asia/Singapore

[root@cdh-vm ~]# timedatectl set-timezone Asia/Singapore

[root@cdh-vm ~]# timedatectl 
      Local time: Sun 2018-02-04 07:52:04 +08
  Universal time: Sat 2018-02-03 23:52:04 UTC
        RTC time: Sat 2018-02-03 23:52:01
       Time zone: Asia/Singapore (+08, +0800)
     NTP enabled: yes
NTP synchronized: yes
 RTC in local TZ: no
      DST active: n/a

Fix "you are accessing a non-optimized Hue" message with Proxy/LB setup

$
0
0
Experienced warning message during accessing Hue Load Balancer service via Nginx proxy:
You are accessing a non-optimized Hue, please switch to one of the available addresses: http://cdh-vm.dbaglobe.com:8889

How to fix:
Hue -> Configuration -> Hue Service Advanced Configuration Snippet (Safety Valve) for hue_safety_valve.ini      
      
[desktop]
hue_load_balancer=http://192.168.31.5:8889,http://cdh-vm.dbaglobe.com:8889



warning: "set mapreduce.framework.name=local" terminates hiveserver2 server process

$
0
0
0: jdbc:hive2://cdh-vm:10000/employees> set mapreduce.framework.name=local;
No rows affected (0.015 seconds)
0: jdbc:hive2://cdh-vm:10000/employees> select id,count(*) num from t1 group by id order by num;
Unknown HS2 problem when communicating with Thrift server.
Error: org.apache.thrift.transport.TTransportException: java.net.SocketException: Broken pipe (Write failed) (state=08S01,code=0)

# /var/run/cloudera-scm-agent/process/175-hive-HIVESERVER2/logs/stderr.log
Job running in-process (local Hadoop)
+ ps -p 898 -c
+ grep java
+ RET=0
+ '[' 0 -eq 0 ']'
+ TARGET=898
++ date
+ echo Tue Feb 6 22:19:34 +08 2018
+ kill -9 898

SQL Server JDBC integrated authentication error

$
0
0
Connection failure. You must change the Database Settings.
  com.microsoft.sqlserver.jdbc.SQLServerException: This driver is not configured for integrated authentication. ClientConnectionId:b31236b3-c830-45c9-bdb0-8e9ecbe01476
      at com.microsoft.sqlserver.jdbc.SQLServerConnection.terminate(SQLServerConnection.java:2400)
      at com.microsoft.sqlserver.jdbc.AuthenticationJNI.(AuthenticationJNI.java:68)
      at com.microsoft.sqlserver.jdbc.SQLServerConnection.logon(SQLServerConnection.java:3132)
      at com.microsoft.sqlserver.jdbc.SQLServerConnection.access$100(SQLServerConnection.java:43)
      at com.microsoft.sqlserver.jdbc.SQLServerConnection$LogonCommand.doExecute(SQLServerConnection.java:3123)
      at com.microsoft.sqlserver.jdbc.TDSCommand.execute(IOBuffer.java:7505)
      at com.microsoft.sqlserver.jdbc.SQLServerConnection.executeCommand(SQLServerConnection.java:2445)
      at com.microsoft.sqlserver.jdbc.SQLServerConnection.connectHelper(SQLServerConnection.java:1981)
      at com.microsoft.sqlserver.jdbc.SQLServerConnection.login(SQLServerConnection.java:1628)
      at com.microsoft.sqlserver.jdbc.SQLServerConnection.connectInternal(SQLServerConnection.java:1459)
      at com.microsoft.sqlserver.jdbc.SQLServerConnection.connect(SQLServerConnection.java:773)
      at com.microsoft.sqlserver.jdbc.SQLServerDriver.connect(SQLServerDriver.java:1168)
      at org.talend.core.model.metadata.builder.database.DriverShim.connect(DriverShim.java:41)
      at org.talend.core.model.metadata.builder.database.ExtractMetaDataUtils.connect(ExtractMetaDataUtils.java:1069)
      at org.talend.core.model.metadata.builder.database.ExtractMetaDataFromDataBase.testConnection(ExtractMetaDataFromDataBase.java:315)
      at org.talend.metadata.managment.repository.ManagerConnection.check(ManagerConnection.java:289)
      at org.talend.repository.ui.wizards.metadata.connection.database.DatabaseForm$62.runWithCancel(DatabaseForm.java:3983)
      at org.talend.repository.ui.wizards.metadata.connection.database.DatabaseForm$62.runWithCancel(DatabaseForm.java:1)
      at org.talend.repository.ui.dialog.AProgressMonitorDialogWithCancel$1.runnableWithCancel(AProgressMonitorDialogWithCancel.java:77)
      at org.talend.repository.ui.dialog.AProgressMonitorDialogWithCancel$ARunnableWithProgressCancel$1.call(AProgressMonitorDialogWithCancel.java:161)
      at java.util.concurrent.FutureTask.run(Unknown Source)
      at java.lang.Thread.run(Unknown Source)
  Caused by: java.lang.UnsatisfiedLinkError: no sqljdbc_auth in java.library.path
      at java.lang.ClassLoader.loadLibrary(Unknown Source)
      at java.lang.Runtime.loadLibrary0(Unknown Source)
      at java.lang.System.loadLibrary(Unknown Source)
      at com.microsoft.sqlserver.jdbc.AuthenticationJNI.(AuthenticationJNI.java:41)
      ... 20 more


How to fix: 

Copy OS architecture matching file (x32 or x64) into c:\Windows or C:\Windows\system32

PS C:\Users\Administrator> copy C:\Donghua\TOD-DI\sqljdbc_6.0\enu\auth\x64\sqljdbc_auth.dll C:\Windows\
PS C:\Users\Administrator> dir C:\Windows\sqljdbc_auth.dll


    Directory: C:\Windows


Mode                LastWriteTime         Length Name
----                -------------         ------ ----
-a----        1/17/2017  11:44 AM         310088 sqljdbc_auth.dll

How to upgrade outdated Python packages

$
0
0
[root@cdh-vm logs]# pip list --outdated --format=legacy 
argcomplete (1.8.2) - Latest: 1.9.3 [wheel]
backports.ssl-match-hostname (3.4.0.2) - Latest: 3.5.0.1 [sdist]
beautifulsoup4 (4.5.3) - Latest: 4.6.0 [wheel]
chardet (2.3.0) - Latest: 3.0.4 [wheel]
configobj (4.7.2) - Latest: 5.0.6 [sdist]
decorator (3.4.0) - Latest: 4.2.1 [wheel]
docx2txt (0.6) - Latest: 0.7 [sdist]
EbookLib (0.15) - Latest: 0.16 [sdist]
perf (0.1) - Latest: 1.5.1 [wheel]
psycopg2 (2.5.1) - Latest: 2.7.4 [wheel]
pycurl (7.19.0) - Latest: 7.43.0.1 [sdist]
pygobject (3.22.0) - Latest: 3.27.2 [sdist]
python-pptx (0.6.5) - Latest: 0.6.7 [sdist]
pyudev (0.15) - Latest: 0.21.0 [sdist]
pyxattr (0.5.1) - Latest: 0.6.0 [sdist]
setuptools (0.9.8) - Latest: 38.5.1 [wheel]
six (1.10.0) - Latest: 1.11.0 [wheel]
SpeechRecognition (3.6.3) - Latest: 3.8.1 [wheel]
urlgrabber (3.10) - Latest: 3.10.2 [sdist]
xlrd (1.0.0) - Latest: 1.1.0 [wheel]

[root@cdh-vm logs]# pip list --outdated --format=columns 
Package                      Version Latest   Type 
---------------------------- ------- -------- -----
argcomplete                  1.8.2   1.9.3    wheel
backports.ssl-match-hostname 3.4.0.2 3.5.0.1  sdist
beautifulsoup4               4.5.3   4.6.0    wheel
chardet                      2.3.0   3.0.4    wheel
configobj                    4.7.2   5.0.6    sdist
decorator                    3.4.0   4.2.1    wheel
docx2txt                     0.6     0.7      sdist
EbookLib                     0.15    0.16     sdist
perf                         0.1     1.5.1    wheel
psycopg2                     2.5.1   2.7.4    wheel
pycurl                       7.19.0  7.43.0.1 sdist
pygobject                    3.22.0  3.27.2   sdist
python-pptx                  0.6.5   0.6.7    sdist
pyudev                       0.15    0.21.0   sdist
pyxattr                      0.5.1   0.6.0    sdist
setuptools                   0.9.8   38.5.1   wheel
six                          1.10.0  1.11.0   wheel
SpeechRecognition            3.6.3   3.8.1    wheel
urlgrabber                   3.10    3.10.2   sdist
xlrd                         1.0.0   1.1.0    wheel

# Upgrade manually one by one
[root@cdh-vm logs]# pip install pycurl -U

# Upgrade all at once (high chance rollback if some package fail
# to upgrade
[root@cdh-vm logs]# pip install $(pip list --outdated --format=columns |tail -n +3|cut -d"" -f1) --upgrade

# Upgrade one by one using loop
[root@cdh-vm logs]# for i in  $(pip list --outdated --format=columns |tail -n +3|cut -d"" -f1); do pip install $i --upgrade; done

pycurl: libcurl link-time ssl backend (nss) is different from compile-time ssl backend (none/other)

$
0
0
[root@cdh-vm logs]# yum update -y
There was a problem importing one of the Python modules
required to run yum. The error leading to this problem was:

   pycurl: libcurl link-time ssl backend (nss) is different from compile-time ssl backend (none/other)

Please install a package which provides this module, or
verify that the module is installed correctly.

It's possible that the above module doesn't match the
current version of Python, which is:
2.7.5 (default, Aug  4 2017, 00:39:18) 
[GCC 4.8.5 20150623 (Red Hat 4.8.5-16)]

If you cannot solve this problem yourself, please go to 
the yum faq at:
  http://yum.baseurl.org/wiki/Faq
  
How to fix:

remove existing pycurl installation

[root@cdh-vm logs]# pip uninstall pycurl
export variable with your link-time ssl backend (which is nss above)

[root@cdh-vm logs]# export PYCURL_SSL_LIBRARY=nss
[root@cdh-vm logs]# pip install pycurl


Learning Apache Pig Chap 2 (Oreilly)

$
0
0
Sample data files:
https://resources.oreilly.com/examples/0636920047704/blob/master/Learning%20Apache%20Pig%20-%20Working%20Files/Chapter%202/cities_small.txt
https://resources.oreilly.com/examples/0636920047704/blob/master/Learning%20Apache%20Pig%20-%20Working%20Files/Chapter%202/states.txt



[donghua@cdh-vm temp]$ pig -4 log4j.properties 
grunt> cities = load 'cities_small.txt' as (name:chararray,state:chararray,pop:int);
grunt> aliases;
grunt> describe cities
cities: {name: chararray,state: chararray,pop: int}
grunt> \de cities
cities: {name: chararray,state: chararray,pop: int}
grunt> ca_cities = filter cities by (state=='CA');
grunt> dump ca_cities;
grunt> \d ca_cities
grunt> illustrate;
(South Gate,CA,96640)
--------------------------------------------------------------------
| cities     | name:chararray    | state:chararray    | pop:int    | 
--------------------------------------------------------------------
|            | South Gate        | CA                 | 96640      | 
--------------------------------------------------------------------

grunt> illustrate;
(Fresno,CA,476050)
--------------------------------------------------------------------
| cities     | name:chararray    | state:chararray    | pop:int    | 
--------------------------------------------------------------------
|            | Fresno            | CA                 | 476050     | 
--------------------------------------------------------------------

grunt> ordered_cities = order cities by pop desc;

grunt> states = load 'states.txt' as (rank:int,code:chararray,fullname:chararray,date_entered:chararray,year_entered:int);

grunt> cities_join_states = join cities by state, states by code;

grunt> illustrate cities_join_states;
(Fargo,ND,93531)
(39,ND,North Dakota,02-NOV,1889)
--------------------------------------------------------------------
| cities     | name:chararray    | state:chararray    | pop:int    | 
--------------------------------------------------------------------
|            | Fargo             | ND                 | 93531      | 
|            | Fargo             | ND                 | 93531      | 
--------------------------------------------------------------------
--------------------------------------------------------------------------------------------------------------------------
| states     | rank:int    | code:chararray    | fullname:chararray    | date_entered:chararray    | year_entered:int    | 
--------------------------------------------------------------------------------------------------------------------------
|            | 39          | ND                | North Dakota          | 02-NOV                    | 1889                | 
|            | 39          | ND                | North Dakota          | 02-NOV                    | 1889                | 
--------------------------------------------------------------------------------------------------------------------------
------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
| cities_join_states     | cities::name:chararray    | cities::state:chararray    | cities::pop:int    | states::rank:int    | states::code:chararray    | states::fullname:chararray    | states::date_entered:chararray    | states::year_entered:int    | 
------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|                        | Fargo                     | ND                         | 93531              | 39                  | ND                        | North Dakota                  | 02-NOV                            | 1889                        | 
|                        | Fargo                     | ND                         | 93531              | 39                  | ND                        | North Dakota                  | 02-NOV                            | 1889                        | 
|                        | Fargo                     | ND                         | 93531              | 39                  | ND                        | North Dakota                  | 02-NOV                            | 1889                        | 
|                        | Fargo                     | ND                         | 93531              | 39                  | ND                        | North Dakota                  | 02-NOV                            | 1889                        | 
------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------

grunt> cities_join_states_short = foreach cities_join_states generate cities::name, states::fullname;

grunt> store cities_join_states_short into 'cities_join_states_short';
grunt> fs -ls cities_join_states_short
grunt> fs -cat cities_join_states_short/part-r-00000



grunt> cities_join_states_short = foreach (join cities by state, states by code) generate cities::name, states::fullname;
grunt> city_and_state = foreach cities generate name,state,pop*1.5;

grunt> cities_by_state = group cities by state;
grunt> \de cities_by_state;
cities_by_state: {group: chararray,cities: {(name: chararray,state: chararray,pop: int)}}
grunt> illustrate cities_by_state;
(Sioux Falls,SD,154997)
-----------------------------------------------------------------------
| cities     | name:chararray     | state:chararray     | pop:int     | 
-----------------------------------------------------------------------
|            | Sioux Falls        | SD                  | 154997      | 
|            | Rapid City         | SD                  | 65491       | 
-----------------------------------------------------------------------
------------------------------------------------------------------------------------------------------------------------------
| cities_by_state     | group:chararray     | cities:bag{:tuple(name:chararray,state:chararray,pop:int)}                     | 
------------------------------------------------------------------------------------------------------------------------------
|                     | SD                  | {(Sioux Falls, SD, 154997), (Rapid City, SD, 65491)}                           | 
------------------------------------------------------------------------------------------------------------------------------

grunt> total_cities = foreach (group cities all) generate COUNT(cities);
grunt> \d total_cities;
(500)

grunt> cities_by_state = foreach (group cities by state) generate group, COUNT(cities);
grunt> \d cities_by_state;

grunt> cities_by_state = foreach (group cities by state parallel 3) generate group, COUNT(cities);
grunt> store cities_by_state into 'cities_by_state';
grunt> fs -ls cities_by_state
Found 4 items
-rw-r--r--   1 donghua supergroup          0 2018-02-17 22:25 cities_by_state/_SUCCESS
-rw-r--r--   1 donghua supergroup        113 2018-02-17 22:25 cities_by_state/part-r-00000
-rw-r--r--   1 donghua supergroup         82 2018-02-17 22:25 cities_by_state/part-r-00001
-rw-r--r--   1 donghua supergroup         86 2018-02-17 22:25 cities_by_state/part-r-00002


Complex data process in SQL vs Pig

$
0
0

[donghua@cdh-vm temp]$ hcat -e "desc employees.departments"
dept_no             string                                 
dept_name           string                                 

[donghua@cdh-vm temp]$ hcat -e "desc employees.dept_manager"
emp_no              int                                    
dept_no             string                                 
from_date           string                                 
to_date             string                                 


[donghua@cdh-vm temp]$ hcat -e "desc employees.dept_emp"
emp_no              int                                    
dept_no             string                                 
from_date           string                                 
to_date             string                                 

[donghua@cdh-vm temp]$ hcat -e "desc employees.employees"
emp_no              int                                    
birth_date          string                                 
first_name          string                                 
last_name           string                                 
gender              string                                 
hire_date           string                                 


-- Find out their manager name & department size
select d.dept_name, concat(m.first_name,'',m.last_name) as manager, count(e.emp_no) employees
from employees.departments d 
join employees.dept_manager dm on d.dept_no = dm.dept_no
join employees.employees m on dm.emp_no = m.emp_no
join employees.dept_emp de on d.dept_no = de.dept_no
join employees.employees e on de.emp_no = e.emp_no
where de.to_date >'2018-01-01'
and dm.to_date > '2018-01-01'
group by d.dept_name, concat(m.first_name,'',m.last_name)
order by d.dept_name;

Run SQL in Hive:

Connecting to jdbc:hive2://cdh-vm.dbaglobe.com:10000/employees
Connected to: Apache Hive (version 1.1.0-cdh5.14.0)
Driver: Hive JDBC (version 1.1.0-cdh5.14.0)
Transaction isolation: TRANSACTION_REPEATABLE_READ
Beeline version 1.1.0-cdh5.14.0 by Apache Hive
0: jdbc:hive2://cdh-vm.dbaglobe.com:10000/emp> -- Find out their manager name & department size
0: jdbc:hive2://cdh-vm.dbaglobe.com:10000/emp> select d.dept_name, concat(m.first_name,'',m.last_name) as manager, count(e.emp_no) employees
. . . . . . . . . . . . . . . . . . . . . . .> from employees.departments d 
. . . . . . . . . . . . . . . . . . . . . . .> join employees.dept_manager dm on d.dept_no = dm.dept_no
. . . . . . . . . . . . . . . . . . . . . . .> join employees.employees m on dm.emp_no = m.emp_no
. . . . . . . . . . . . . . . . . . . . . . .> join employees.dept_emp de on d.dept_no = de.dept_no
. . . . . . . . . . . . . . . . . . . . . . .> join employees.employees e on de.emp_no = e.emp_no
. . . . . . . . . . . . . . . . . . . . . . .> where de.to_date >'2018-01-01'
. . . . . . . . . . . . . . . . . . . . . . .> and dm.to_date > '2018-01-01'
. . . . . . . . . . . . . . . . . . . . . . .> group by d.dept_name, concat(m.first_name,'',m.last_name)
. . . . . . . . . . . . . . . . . . . . . . .> order by d.dept_name;

+---------------------+--------------------+------------+--+
|     d.dept_name     |      manager       | employees  |
+---------------------+--------------------+------------+--+
| Customer Service    | Yuchang Weedman    | 17569      |
| Development         | Leon DasSarma      | 61386      |
| Finance             | Isamu Legleitner   | 12437      |
| Human Resources     | Karsten Sigstam    | 12898      |
| Marketing           | Vishwani Minakawa  | 14842      |
| Production          | Oscar Ghazalie     | 53304      |
| Quality Management  | Dung Pesch         | 14546      |
| Research            | Hilary Kambil      | 15441      |
| Sales               | Hauke Zhang        | 37701      |
+---------------------+--------------------+------------+--+
9 rows selected (100.528 seconds)

Run SQL in Impala:

Connected to cdh-vm.dbaglobe.com:21000
Server version: impalad version 2.11.0-cdh5.14.0 RELEASE (build d68206561bce6b26762d62c01a78e6cd27aa7690)
***********************************************************************************
Welcome to the Impala shell.
(Impala Shell v2.11.0-cdh5.14.0 (d682065) built on Sat Jan  6 13:27:16 PST 2018)

Press TAB twice to see a list of available commands.
***********************************************************************************
[cdh-vm.dbaglobe.com:21000] > -- Find out their manager name & department size
                            > select d.dept_name, concat(m.first_name,'',m.last_name) as manager, count(e.emp_no) employees
                            > from employees.departments d 
                            > join employees.dept_manager dm on d.dept_no = dm.dept_no
                            > join employees.employees m on dm.emp_no = m.emp_no
                            > join employees.dept_emp de on d.dept_no = de.dept_no
                            > join employees.employees e on de.emp_no = e.emp_no
                            > where de.to_date >'2018-01-01'
                            > and dm.to_date > '2018-01-01'
                            > group by d.dept_name, concat(m.first_name,'',m.last_name)
                            > order by d.dept_name;
Query: -- Find out their manager name & department size
select d.dept_name, concat(m.first_name,'',m.last_name) as manager, count(e.emp_no) employees
from employees.departments d
join employees.dept_manager dm on d.dept_no = dm.dept_no
join employees.employees m on dm.emp_no = m.emp_no
join employees.dept_emp de on d.dept_no = de.dept_no
join employees.employees e on de.emp_no = e.emp_no
where de.to_date >'2018-01-01'
and dm.to_date > '2018-01-01'
group by d.dept_name, concat(m.first_name,'',m.last_name)
order by d.dept_name
Query submitted at: 2018-02-18 20:58:51 (Coordinator: http://cdh-vm.dbaglobe.com:25000)
Query progress can be monitored at: http://cdh-vm.dbaglobe.com:25000/query_plan?query_id=a04e8317637c0e4a:a83017f00000000
+--------------------+-------------------+-----------+
| dept_name          | manager           | employees |
+--------------------+-------------------+-----------+
| Customer Service   | Yuchang Weedman   | 17569     |
| Development        | Leon DasSarma     | 61386     |
| Finance            | Isamu Legleitner  | 12437     |
| Human Resources    | Karsten Sigstam   | 12898     |
| Marketing          | Vishwani Minakawa | 14842     |
| Production         | Oscar Ghazalie    | 53304     |
| Quality Management | Dung Pesch        | 14546     |
| Research           | Hilary Kambil     | 15441     |
| Sales              | Hauke Zhang       | 37701     |
+--------------------+-------------------+-----------+
Fetched 9 row(s) in 19.43s

Run SQL in Mysql:
  
Server version: 5.5.56-MariaDB MariaDB Server

Copyright (c) 2000, 2017, Oracle, MariaDB Corporation Ab and others.

Type 'help;' or '\h' for help. Type '\c' to clear the current input statement.

MariaDB [employees]> -- Find out their manager name & department size
MariaDB [employees]> select d.dept_name, concat(m.first_name,'',m.last_name) as manager, count(e.emp_no) employees
    -> from employees.departments d 
    -> join employees.dept_manager dm on d.dept_no = dm.dept_no
    -> join employees.employees m on dm.emp_no = m.emp_no
    -> join employees.dept_emp de on d.dept_no = de.dept_no
    -> join employees.employees e on de.emp_no = e.emp_no
    -> where de.to_date >'2018-01-01'
    -> and dm.to_date > '2018-01-01'
    -> group by d.dept_name, concat(m.first_name,'',m.last_name)
    -> order by d.dept_name;
+--------------------+-------------------+-----------+
| dept_name          | manager           | employees |
+--------------------+-------------------+-----------+
| Customer Service   | Yuchang Weedman   |     17569 |
| Development        | Leon DasSarma     |     61386 |
| Finance            | Isamu Legleitner  |     12437 |
| Human Resources    | Karsten Sigstam   |     12898 |
| Marketing          | Vishwani Minakawa |     14842 |
| Production         | Oscar Ghazalie    |     53304 |
| Quality Management | Dung Pesch        |     14546 |
| Research           | Hilary Kambil     |     15441 |
| Sales              | Hauke Zhang       |     37701 |
+--------------------+-------------------+-----------+
9 rows in set (1.62 sec)

Re-write SQL in Pig-Latin:

-- pig script
-- Find out department size and their manager name
d0 = LOAD 'employees.departments' USING org.apache.hive.hcatalog.pig.HCatLoader();
dm0 = LOAD 'employees.dept_manager' USING org.apache.hive.hcatalog.pig.HCatLoader();
de0 = LOAD 'employees.dept_emp' USING org.apache.hive.hcatalog.pig.HCatLoader();
e0 = LOAD 'employees.employees' USING org.apache.hive.hcatalog.pig.HCatLoader();

d1 = FOREACH d0 GENERATE dept_no, dept_name;
dm1 = FOREACH (FILTER dm0 BY to_date >'2018-01-01') GENERATE dept_no, emp_no;
de1 = FOREACH (FILTER de0 by to_date >'2018-01-01') GENERATE dept_no, emp_no;
e1 = FOREACH e0 GENERATE emp_no, CONCAT(first_name,'',last_name) AS fullname;

d1_dm1 = JOIN d1 BY dept_no, dm1 BY dept_no;
d1_dm1_e1 = JOIN d1_dm1 BY emp_no, e1 BY emp_no;

dept_mgr = FOREACH d1_dm1_e1 GENERATE d1_dm1::d1::dept_no AS dept_no, d1_dm1::d1::dept_name AS dept_name, e1::fullname AS manager;

d1_de1 = JOIN d1 BY dept_no, de1 BY dept_no;
d1_de1_e1 = FOREACH (JOIN d1_de1 BY emp_no, e1 BY emp_no) GENERATE d1_de1::d1::dept_no,e1::emp_no;
dept_emp_count = FOREACH (GROUP d1_de1_e1 BY dept_no) GENERATE group AS dept_no, COUNT(d1_de1_e1) AS employees;

dept_info_0 = JOIN dept_mgr BY dept_no, dept_emp_count BY dept_no;
dept_info_1 = FOREACH dept_info_0 GENERATE dept_mgr::dept_name AS dept_name, dept_mgr::manager AS manager, dept_emp_count::employees AS employees;
dept_info_2 = ORDER dept_info_0 BY dept_name;

DUMP dept_info_2;


[donghua@cdh-vm temp]$ date;pig -4 log4j.properties emp.pig;date;

Sun Feb 18 22:09:44 +08 2018

(d009,Customer Service,Yuchang Weedman,d009,17569)
(d005,Development,Leon DasSarma,d005,61386)
(d002,Finance,Isamu Legleitner,d002,12437)
(d003,Human Resources,Karsten Sigstam,d003,12898)
(d001,Marketing,Vishwani Minakawa,d001,14842)
(d004,Production,Oscar Ghazalie,d004,53304)
(d006,Quality Management,Dung Pesch,d006,14546)
(d008,Research,Hilary Kambil,d008,15441)
(d007,Sales,Hauke Zhang,d007,37701)

Sun Feb 18 22:16:08 +08 2018

Apache Reverse Proxy Example for Cloudera Yarn

$
0
0
[root@cdh-vm conf.d]# cat /etc/httpd/conf.d/reverse.conf 
# Yarn Resource Manager
Listen 192.168.31.14:8088

    ProxyPreserveHost On
    ProxyPass / http://cdh-vm.dbaglobe.com:8088/
    ProxyPassReverse / http://cdh-vm.dbaglobe.com:8088/


# Yarn Node Manager
Listen 192.168.31.14:8042

    ProxyPreserveHost On
    ProxyPass / http://cdh-vm.dbaglobe.com:8042/
    ProxyPassReverse / http://cdh-vm.dbaglobe.com:8042/


# Yarn JobHistory Server
Listen 192.168.31.14:19888

    ProxyPreserveHost On
    ProxyPass / http://cdh-vm.dbaglobe.com:19888/
    ProxyPassReverse / http://cdh-vm.dbaglobe.com:19888/



Example to load CSV with newline characters within data into Hadoop tables

$
0
0

[donghua@cdh-vm source]$ cat newline.txt 
id,text
1,"a
b"
2,"c"
3,"新年快乐"

[donghua@cdh-vm source]$ cat convert_csv_to_parquet.py
import pandas as pd
import pyarrow as pa
import pyarrow.parquet as pq
df = pd.read_csv('newline.txt')
# Convert from pandas to Arrow
table = pa.Table.from_pandas(df)
pq.write_table(table,'newline.parquet')

[donghua@cdh-vm source]$ python convert_csv_to_parquet.py

[donghua@cdh-vm source]$ parquet-tools cat -j newline.parquet
{"id":1,"text":"YQpi","__index_level_0__":0}
{"id":2,"text":"Yw==","__index_level_0__":1}
{"id":3,"text":"5paw5bm05b+r5LmQ","__index_level_0__":2}

[donghua@cdh-vm source]$ hdfs dfs -mkdir tbl_newline_parquet
[donghua@cdh-vm source]$ hdfs dfs -put newline.parquet tbl_newline_parquet/

0: jdbc:hive2://cdh-vm.dbaglobe.com:10000/def> -- Hive syntax
0: jdbc:hive2://cdh-vm.dbaglobe.com:10000/def> create external table tbl_newline_3
. . . . . . . . . . . . . . . . . . . . . . .> (id bigint, text string)
. . . . . . . . . . . . . . . . . . . . . . .> stored as parquet
. . . . . . . . . . . . . . . . . . . . . . .> location '/user/donghua/tbl_newline_parquet';
No rows affected (0.114 seconds)
0: jdbc:hive2://cdh-vm.dbaglobe.com:10000/def> select * from tbl_newline_3;
+-------------------+---------------------+--+
| tbl_newline_3.id  | tbl_newline_3.text  |
+-------------------+---------------------+--+
| 1                 | a
b                 |
| 2                 | c                   |
| 3                 | 新年快乐                |
+-------------------+---------------------+--+
3 rows selected (0.132 seconds)


[cdh-vm.dbaglobe.com:21000] > -- impala syntax
                            > create external table tbl_newline_2
                            > LIKE PARQUET '/user/donghua/tbl_newline_parquet/newline.parquet'
                            > stored as parquet
                            > location '/user/donghua/tbl_newline_parquet';

[cdh-vm.dbaglobe.com:21000] > desc tbl_newline_2;
+-------------------+--------+-----------------------------+
| name              | type   | comment                     |
+-------------------+--------+-----------------------------+
| id                | bigint | Inferred from Parquet file. |
| text              | string | Inferred from Parquet file. |
| __index_level_0__ | bigint | Inferred from Parquet file. |
+-------------------+--------+-----------------------------+
Fetched 3 row(s) in 0.02s

[cdh-vm.dbaglobe.com:21000] > select * from tbl_newline_2;
+----+----------+-------------------+
| id | text     | __index_level_0__ |
+----+----------+-------------------+
| 1  | a        | 0                 |
|    | b        |                   |
| 2  | c        | 1                 |
| 3  | 新年快乐 | 2                 |
+----+----------+-------------------+
Fetched 3 row(s) in 5.25s


Hive Beeline default configuration for CDH 5.14

$
0
0
[donghua@cdh-vm scripts]$ beeline -u jdbc:hive2://cdh-vm.dbaglobe.com:10000/test -n donghua << EOD |grep -v hive-exec-core.jar
> ! set headerinterval 10000
> ! set outputformat csv2
> set
> EOD
Java HotSpot(TM) 64-Bit Server VM warning: ignoring option MaxPermSize=512M; support was removed in 8.0
Java HotSpot(TM) 64-Bit Server VM warning: ignoring option MaxPermSize=512M; support was removed in 8.0
scan complete in 1ms
Connecting to jdbc:hive2://cdh-vm.dbaglobe.com:10000/test
Connected to: Apache Hive (version 1.1.0-cdh5.14.0)
Driver: Hive JDBC (version 1.1.0-cdh5.14.0)
Transaction isolation: TRANSACTION_REPEATABLE_READ
Beeline version 1.1.0-cdh5.14.0 by Apache Hive
0: jdbc:hive2://cdh-vm.dbaglobe.com:10000/tes> ! set headerinterval 10000
0: jdbc:hive2://cdh-vm.dbaglobe.com:10000/tes> ! set outputformat csv2
0: jdbc:hive2://cdh-vm.dbaglobe.com:10000/tes> set
. . . . . . . . . . . . . . . . . . . . . . .> set
_hive.hdfs.session.path=/tmp/hive/donghua/d4b067f0-b697-48ff-8223-3f4b527f090c
_hive.local.session.path=/tmp/hive/d4b067f0-b697-48ff-8223-3f4b527f090c
_hive.tmp_table_space=/tmp/hive/donghua/d4b067f0-b697-48ff-8223-3f4b527f090c/_tmp_space.db
datanucleus.autoCreateSchema=true
datanucleus.autoStartMechanismMode=checked
datanucleus.cache.level2=false
datanucleus.cache.level2.type=none
datanucleus.connectionPoolingType=BONECP
datanucleus.fixedDatastore=false
datanucleus.identifierFactory=datanucleus1
datanucleus.plugin.pluginRegistryBundleCheck=LOG
datanucleus.rdbms.useLegacyNativeValueStrategy=true
datanucleus.storeManagerType=rdbms
datanucleus.transactionIsolation=read-committed
datanucleus.validateColumns=false
datanucleus.validateConstraints=false
datanucleus.validateTables=false
fs.har.impl=org.apache.hadoop.hive.shims.HiveHarFileSystem
fs.scheme.class=dfs
hadoop.bin.path=/opt/cloudera/parcels/CDH-5.14.0-1.cdh5.14.0.p0.24/lib/hadoop/bin/hadoop
hive.analyze.stmt.collect.partlevel.stats=true
hive.archive.enabled=false
hive.auto.convert.join=true
hive.auto.convert.join.noconditionaltask=true
hive.auto.convert.join.noconditionaltask.size=20971520
hive.auto.convert.join.use.nonstaged=false
hive.auto.convert.sortmerge.join=false
hive.auto.convert.sortmerge.join.bigtable.selection.policy=org.apache.hadoop.hive.ql.optimizer.AvgPartitionSizeBasedBigTableSelectorForAutoSMJ
hive.auto.convert.sortmerge.join.to.mapjoin=false
hive.auto.progress.timeout=0s
hive.autogen.columnalias.prefix.includefuncname=false
hive.autogen.columnalias.prefix.label=_c
hive.binary.record.max.length=1000
hive.blobstore.optimizations.enabled=true
hive.blobstore.supported.schemes=s3,s3a,s3n
hive.blobstore.use.blobstore.as.scratchdir=false
hive.cache.expr.evaluation=true
hive.cbo.enable=false
hive.cli.errors.ignore=false
hive.cli.pretty.output.num.cols=-1
hive.cli.print.current.db=false
hive.cli.print.header=false
hive.cli.prompt=hive
hive.cluster.delegation.token.store.class=org.apache.hadoop.hive.thrift.MemoryTokenStore
hive.cluster.delegation.token.store.zookeeper.znode=/hivedelegation
hive.compactor.abortedtxn.threshold=1000
hive.compactor.check.interval=300s
hive.compactor.cleaner.run.interval=5000ms
hive.compactor.delta.num.threshold=10
hive.compactor.delta.pct.threshold=0.1
hive.compactor.initiator.on=false
hive.compactor.worker.threads=0
hive.compactor.worker.timeout=86400s
hive.compat=0.12
hive.compute.query.using.stats=false
hive.compute.splits.in.am=true
hive.conf.hidden.list=javax.jdo.option.ConnectionPassword,hive.server2.keystore.password,fs.s3.awsAccessKeyId,fs.s3.awsSecretAccessKey,fs.s3n.awsAccessKeyId,fs.s3n.awsSecretAccessKey,fs.s3a.access.key,fs.s3a.secret.key,fs.s3a.proxy.password,dfs.adls.oauth2.credential,fs.adl.oauth2.credential
hive.conf.restricted.list=hive.security.authenticator.manager,hive.security.authorization.manager,hive.users.in.admin.role,hadoop.bin.path,yarn.bin.path,_hive.local.session.path,_hive.hdfs.session.path,_hive.tmp_table_space,_hive.local.session.path,_hive.hdfs.session.path,_hive.tmp_table_space
hive.conf.validation=true
hive.convert.join.bucket.mapjoin.tez=false
hive.counters.group.name=HIVE
hive.debug.localtask=false
hive.decode.partition.name=false
hive.default.fileformat=TextFile
hive.default.rcfile.serde=org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
hive.default.serde=org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
hive.display.partition.cols.separately=true
hive.downloaded.resources.dir=/tmp/${hive.session.id}_resources
hive.enforce.bucketing=false
hive.enforce.bucketmapjoin=false
hive.enforce.sorting=false
hive.enforce.sortmergebucketmapjoin=false
hive.entity.capture.input.URI=true
hive.entity.capture.transform=false
hive.entity.separator=@
hive.error.on.empty.partition=false
hive.exec.check.crossproducts=true
hive.exec.compress.intermediate=false
hive.exec.compress.output=false
hive.exec.concatenate.check.index=true
hive.exec.copyfile.maxsize=33554432
hive.exec.counters.pull.interval=1000
hive.exec.default.partition.name=__HIVE_DEFAULT_PARTITION__
hive.exec.drop.ignorenonexistent=true
hive.exec.dynamic.partition=true
hive.exec.dynamic.partition.mode=strict
hive.exec.infer.bucket.sort=false
hive.exec.infer.bucket.sort.num.buckets.power.two=false
hive.exec.input.listing.max.threads=15
hive.exec.job.debug.capture.stacktraces=true
hive.exec.job.debug.timeout=30000
hive.exec.local.scratchdir=/tmp/hive
hive.exec.max.created.files=100000
hive.exec.max.dynamic.partitions=1000
hive.exec.max.dynamic.partitions.pernode=100
hive.exec.mode.local.auto=false
hive.exec.mode.local.auto.input.files.max=4
hive.exec.mode.local.auto.inputbytes.max=134217728
hive.exec.orc.block.padding.tolerance=0.05
hive.exec.orc.compression.strategy=SPEED
hive.exec.orc.default.block.padding=true
hive.exec.orc.default.block.size=268435456
hive.exec.orc.default.buffer.size=262144
hive.exec.orc.default.compress=ZLIB
hive.exec.orc.default.row.index.stride=10000
hive.exec.orc.default.stripe.size=67108864
hive.exec.orc.dictionary.key.size.threshold=0.8
hive.exec.orc.encoding.strategy=SPEED
hive.exec.orc.memory.pool=0.5
hive.exec.orc.skip.corrupt.data=false
hive.exec.orc.zerocopy=false
hive.exec.parallel=false
hive.exec.parallel.thread.number=8
hive.exec.perf.logger=org.apache.hadoop.hive.ql.log.PerfLogger
hive.exec.rcfile.use.explicit.header=true
hive.exec.rcfile.use.sync.cache=true
hive.exec.reducers.bytes.per.reducer=67108864
hive.exec.reducers.max=1099
hive.exec.rowoffset=false
hive.exec.scratchdir=/tmp/hive
hive.exec.script.allow.partial.consumption=false
hive.exec.script.maxerrsize=100000
hive.exec.script.trust=false
hive.exec.show.job.failure.debug.info=true
hive.exec.stagingdir=.hive-staging
hive.exec.submit.local.task.via.child=true
hive.exec.submitviachild=false
hive.exec.tasklog.debug.timeout=20000
hive.execution.engine=mr
hive.exim.strict.repl.tables=true
hive.exim.uri.scheme.whitelist=hdfs,pfile,s3,s3a,adl
hive.explain.dependency.append.tasktype=false
hive.fetch.output.serde=org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
hive.fetch.task.aggr=false
hive.fetch.task.conversion=minimal
hive.fetch.task.conversion.threshold=268435456
hive.file.max.footer=100
hive.fileformat.check=true
hive.groupby.mapaggr.checkinterval=100000
hive.groupby.orderby.position.alias=false
hive.groupby.skewindata=false
hive.hashtable.initialCapacity=100000
hive.hashtable.key.count.adjustment=1.0
hive.hashtable.loadfactor=0.75
hive.hbase.generatehfiles=false
hive.hbase.snapshot.restoredir=/tmp
hive.hbase.wal.enabled=true
hive.heartbeat.interval=1000
hive.hmshandler.force.reload.conf=false
hive.hmshandler.retry.attempts=10
hive.hmshandler.retry.interval=2000ms
hive.hwi.listen.host=0.0.0.0
hive.hwi.listen.port=9999
hive.hwi.war.file=${env:HWI_WAR_FILE}
hive.ignore.mapjoin.hint=true
hive.in.test=false
hive.in.test.remove.logs=true
hive.in.test.short.logs=false
hive.in.tez.test=false
hive.index.compact.binary.search=true
hive.index.compact.file.ignore.hdfs=false
hive.index.compact.query.max.entries=10000000
hive.index.compact.query.max.size=10737418240
hive.input.format=org.apache.hadoop.hive.ql.io.CombineHiveInputFormat
hive.insert.into.external.tables=true
hive.insert.into.multilevel.dirs=false
hive.io.rcfile.column.number.conf=0
hive.io.rcfile.record.buffer.size=4194304
hive.io.rcfile.record.interval=2147483647
hive.io.rcfile.tolerate.corruptions=false
hive.jobname.length=50
hive.join.cache.size=25000
hive.join.emit.interval=1000
hive.lazysimple.extended_boolean_literal=false
hive.limit.optimize.enable=false
hive.limit.optimize.fetch.max=50000
hive.limit.optimize.limit.file=10
hive.limit.pushdown.memory.usage=0.1
hive.limit.query.max.table.partition=-1
hive.limit.row.max.size=100000
hive.load.dynamic.partitions.thread=15
hive.localize.resource.num.wait.attempts=5
hive.localize.resource.wait.interval=5000ms
hive.lock.manager=org.apache.hadoop.hive.ql.lockmgr.zookeeper.ZooKeeperHiveLockManager
hive.lock.mapred.only.operation=false
hive.lock.numretries=100
hive.lock.query.string.max.length=1000000
hive.lock.sleep.between.retries=60s
hive.lockmgr.zookeeper.default.partition.name=__HIVE_DEFAULT_ZOOKEEPER_PARTITION__
hive.log.explain.output=false
hive.map.aggr=true
hive.map.aggr.hash.force.flush.memory.threshold=0.9
hive.map.aggr.hash.min.reduction=0.5
hive.map.aggr.hash.percentmemory=0.5
hive.map.groupby.sorted=false
hive.map.groupby.sorted.testmode=false
hive.mapjoin.bucket.cache.size=100
hive.mapjoin.check.memory.rows=100000
hive.mapjoin.followby.gby.localtask.max.memory.usage=0.55
hive.mapjoin.followby.map.aggr.hash.percentmemory=0.3
hive.mapjoin.localtask.max.memory.usage=0.9
hive.mapjoin.optimized.hashtable=true
hive.mapjoin.optimized.hashtable.wbsize=10485760
hive.mapjoin.smalltable.filesize=25000000
hive.mapper.cannot.span.multiple.partitions=false
hive.mapred.local.mem=0
hive.mapred.mode=nonstrict
hive.mapred.partitioner=org.apache.hadoop.hive.ql.io.DefaultHivePartitioner
hive.mapred.reduce.tasks.speculative.execution=true
hive.mapred.supports.subdirectories=false
hive.merge.mapfiles=true
hive.merge.mapredfiles=false
hive.merge.orcfile.stripe.level=true
hive.merge.rcfile.block.level=true
hive.merge.size.per.task=268435456
hive.merge.smallfiles.avgsize=16777216
hive.merge.sparkfiles=true
hive.merge.tezfiles=false
hive.metadata.move.exported.metadata.to.trash=true
hive.metastore.archive.intermediate.archived=_INTERMEDIATE_ARCHIVED
hive.metastore.archive.intermediate.extracted=_INTERMEDIATE_EXTRACTED
hive.metastore.archive.intermediate.original=_INTERMEDIATE_ORIGINAL
hive.metastore.authorization.storage.checks=false
hive.metastore.batch.retrieve.max=300
hive.metastore.batch.retrieve.table.partition.max=1000
hive.metastore.cache.pinobjtypes=Table,StorageDescriptor,SerDeInfo,Partition,Database,Type,FieldSchema,Order
hive.metastore.client.connect.retry.delay=1s
hive.metastore.client.socket.timeout=300
hive.metastore.connect.retries=3
hive.metastore.direct.sql.batch.size=0
hive.metastore.disallow.incompatible.col.type.changes=false
hive.metastore.dml.events=false
hive.metastore.event.clean.freq=0s
hive.metastore.event.db.listener.timetolive=86400s
hive.metastore.event.expiry.duration=0s
hive.metastore.execute.setugi=true
hive.metastore.expression.proxy=org.apache.hadoop.hive.ql.optimizer.ppr.PartitionExpressionForMetastore
hive.metastore.failure.retries=1
hive.metastore.filter.hook=org.apache.hadoop.hive.metastore.DefaultMetaStoreFilterHookImpl
hive.metastore.fs.handler.class=org.apache.hadoop.hive.metastore.HiveMetaStoreFsImpl
hive.metastore.fshandler.threads=15
hive.metastore.initial.metadata.count.enabled=true
hive.metastore.integral.jdo.pushdown=false
hive.metastore.kerberos.principal=hive-metastore/_HOST@EXAMPLE.COM
hive.metastore.limit.partition.request=-1
hive.metastore.metrics.enabled=false
hive.metastore.orm.retrieveMapNullsAsEmptyStrings=false
hive.metastore.rawstore.impl=org.apache.hadoop.hive.metastore.ObjectStore
hive.metastore.sasl.enabled=false
hive.metastore.schema.info.class=org.apache.hadoop.hive.metastore.CDHMetaStoreSchemaInfo
hive.metastore.schema.verification=false
hive.metastore.schema.verification.record.version=true
hive.metastore.server.max.message.size=104857600
hive.metastore.server.max.threads=1000
hive.metastore.server.min.threads=200
hive.metastore.server.tcp.keepalive=true
hive.metastore.thrift.compact.protocol.enabled=false
hive.metastore.thrift.framed.transport.enabled=false
hive.metastore.try.direct.sql=true
hive.metastore.try.direct.sql.ddl=true
hive.metastore.uris=thrift://cdh-vm.dbaglobe.com:9083
hive.metastore.use.SSL=false
hive.metastore.warehouse.dir=/user/hive/warehouse
hive.msck.path.validation=throw
hive.msck.repair.batch.size=0
hive.multi.insert.move.tasks.share.dependencies=false
hive.multigroupby.singlereducer=true
hive.mv.files.thread=15
hive.new.job.grouping.set.cardinality=30
hive.optimize.bucketingsorting=true
hive.optimize.bucketmapjoin=false
hive.optimize.bucketmapjoin.sortedmerge=false
hive.optimize.constant.propagation=true
hive.optimize.correlation=false
hive.optimize.distinct.rewrite=true
hive.optimize.groupby=true
hive.optimize.index.autoupdate=false
hive.optimize.index.filter=true
hive.optimize.index.filter.compact.maxsize=-1
hive.optimize.index.filter.compact.minsize=5368709120
hive.optimize.index.groupby=false
hive.optimize.listbucketing=false
hive.optimize.metadataonly=true
hive.optimize.null.scan=true
hive.optimize.ppd=true
hive.optimize.ppd.storage=true
hive.optimize.reducededuplication=true
hive.optimize.reducededuplication.min.reducer=4
hive.optimize.remove.identity.project=true
hive.optimize.sampling.orderby=false
hive.optimize.sampling.orderby.number=1000
hive.optimize.sampling.orderby.percent=0.1
hive.optimize.skewjoin=false
hive.optimize.skewjoin.compiletime=false
hive.optimize.sort.dynamic.partition=false
hive.optimize.union.remove=false
hive.orc.cache.stripe.details.size=10000
hive.orc.compute.splits.num.threads=10
hive.orc.row.index.stride.dictionary.check=true
hive.orc.splits.include.file.footer=false
hive.outerjoin.supports.filters=true
hive.parquet.timestamp.skip.conversion=true
hive.plan.serialization.format=kryo
hive.ppd.recognizetransivity=true
hive.ppd.remove.duplicatefilters=true
hive.prewarm.enabled=false
hive.prewarm.numcontainers=10
hive.prewarm.spark.timeout=5000ms
hive.query.result.fileformat=TextFile
hive.query.timeout.seconds=0s
hive.querylog.enable.plan.progress=true
hive.querylog.location=/tmp/hive
hive.querylog.plan.progress.interval=60000ms
hive.reorder.nway.joins=true
hive.resultset.use.unique.column.names=true
hive.rework.mapredwork=false
hive.rpc.query.plan=false
hive.sample.seednumber=0
hive.scratch.dir.permission=700
hive.scratchdir.lock=false
hive.script.auto.progress=false
hive.script.operator.env.blacklist=hive.txn.valid.txns,hive.script.operator.env.blacklist
hive.script.operator.id.env.var=HIVE_SCRIPT_OPERATOR_ID
hive.script.operator.truncate.env=false
hive.script.recordreader=org.apache.hadoop.hive.ql.exec.TextRecordReader
hive.script.recordwriter=org.apache.hadoop.hive.ql.exec.TextRecordWriter
hive.script.serde=org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
hive.security.authenticator.manager=org.apache.hadoop.hive.ql.security.HadoopDefaultAuthenticator
hive.security.authorization.enabled=false
hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.DefaultHiveAuthorizationProvider
hive.security.authorization.sqlstd.confwhitelist=hive\.auto\..*|hive\.cbo\..*|hive\.convert\..*|hive\.exec\.dynamic\.partition.*|hive\.exec\..*\.dynamic\.partitions\..*|hive\.exec\.compress\..*|hive\.exec\.infer\..*|hive\.exec\.mode.local\..*|hive\.exec\.orc\..*|hive\.fetch.task\..*|hive\.hbase\..*|hive\.index\..*|hive\.index\..*|hive\.intermediate\..*|hive\.join\..*|hive\.limit\..*|hive\.mapjoin\..*|hive\.merge\..*|hive\.optimize\..*|hive\.orc\..*|hive\.outerjoin\..*|hive\.ppd\..*|hive\.prewarm\..*|hive\.skewjoin\..*|hive\.smbjoin\..*|hive\.stats\..*|hive\.tez\..*|hive\.vectorized\..*|mapred\.map\..*|mapred\.reduce\..*|mapred\.output\.compression\.codec|mapreduce\.job\.reduce\.slowstart\.completedmaps|mapreduce\.job\.queuename|mapreduce\.input\.fileinputformat\.split\.minsize|mapreduce\.map\..*|mapreduce\.reduce\..*|tez\.am\..*|tez\.task\..*|tez\.runtime\..*|hive\.exec\.reducers\.bytes\.per\.reducer|hive\.client\.stats\.counters|hive\.exec\.default\.partition\.name|hive\.exec\.drop\.ignorenonexistent|hive\.counters\.group\.name|hive\.enforce\.bucketing|hive\.enforce\.bucketmapjoin|hive\.enforce\.sorting|hive\.enforce\.sortmergebucketmapjoin|hive\.cache\.expr\.evaluation|hive\.groupby\.skewindata|hive\.hashtable\.loadfactor|hive\.hashtable\.initialCapacity|hive\.ignore\.mapjoin\.hint|hive\.limit\.row\.max\.size|hive\.mapred\.mode|hive\.map\.aggr|hive\.compute\.query\.using\.stats|hive\.exec\.rowoffset|hive\.variable\.substitute|hive\.variable\.substitute\.depth|hive\.autogen\.columnalias\.prefix\.includefuncname|hive\.autogen\.columnalias\.prefix\.label|hive\.exec\.check\.crossproducts|hive\.compat|hive\.exec\.concatenate\.check\.index|hive\.display\.partition\.cols\.separately|hive\.error\.on\.empty\.partition|hive\.execution\.engine|hive\.exim\.uri\.scheme\.whitelist|hive\.file\.max\.footer|hive\.mapred\.supports\.subdirectories|hive\.insert\.into\.multilevel\.dirs|hive\.localize\.resource\.num\.wait\.attempts|hive\.multi\.insert\.move\.tasks\.share\.dependencies|hive\.support\.quoted\.identifiers|hive\.resultset\.use\.unique\.column\.names|hive\.analyze\.stmt\.collect\.partlevel\.stats|hive\.exec\.job\.debug\.capture\.stacktraces|hive\.exec\.job\.debug\.timeout|hive\.exec\.max\.created\.files|hive\.exec\.reducers\.max|hive\.output\.file\.extension|hive\.exec\.show\.job\.failure\.debug\.info|hive\.exec\.tasklog\.debug\.timeout|hive\.query\.id
hive.security.authorization.task.factory=org.apache.hadoop.hive.ql.parse.authorization.RestrictedHiveAuthorizationTaskFactoryImpl
hive.security.command.whitelist=set,reset,dfs,add,list,delete,reload,compile
hive.security.metastore.authenticator.manager=org.apache.hadoop.hive.ql.security.HadoopDefaultMetastoreAuthenticator
hive.security.metastore.authorization.auth.reads=true
hive.security.metastore.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.DefaultHiveMetastoreAuthorizationProvider
hive.serdes.using.metastore.for.schema=org.apache.hadoop.hive.ql.io.orc.OrcSerde,org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe,org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe,org.apache.hadoop.hive.serde2.dynamic_type.DynamicSerDe,org.apache.hadoop.hive.serde2.MetadataTypedColumnsetSerDe,org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe,org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe,org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
hive.server.read.socket.timeout=10s
hive.server.tcp.keepalive=true
hive.server2.allow.user.substitution=true
hive.server2.async.exec.async.compile=false
hive.server2.async.exec.keepalive.time=10s
hive.server2.async.exec.shutdown.timeout=10s
hive.server2.async.exec.threads=100
hive.server2.async.exec.wait.queue.size=100
hive.server2.authentication=NONE
hive.server2.authentication.ldap.groupClassKey=groupOfNames
hive.server2.authentication.ldap.groupMembershipKey=member
hive.server2.authentication.ldap.guidKey=uid
hive.server2.clear.dangling.scratchdir=false
hive.server2.clear.dangling.scratchdir.interval=1800s
hive.server2.compile.lock.timeout=0s
hive.server2.enable.doAs=true
hive.server2.global.init.file.location=/run/cloudera-scm-agent/process/385-hive-HIVESERVER2
hive.server2.idle.operation.timeout=21600000
hive.server2.idle.session.check.operation=false
hive.server2.idle.session.timeout=43200000
hive.server2.idle.session.timeout_check_operation=true
hive.server2.logging.operation.enabled=true
hive.server2.logging.operation.level=EXECUTION
hive.server2.logging.operation.log.location=/var/log/hive/operation_logs
hive.server2.long.polling.timeout=5000ms
hive.server2.map.fair.scheduler.queue=true
hive.server2.max.start.attempts=30
hive.server2.metrics.enabled=true
hive.server2.session.check.interval=900000
hive.server2.sleep.interval.between.start.attempts=60s
hive.server2.support.dynamic.service.discovery=false
hive.server2.table.type.mapping=CLASSIC
hive.server2.tez.initialize.default.sessions=false
hive.server2.tez.sessions.per.default.queue=1
hive.server2.thrift.bind.host=cdh-vm.dbaglobe.com
hive.server2.thrift.exponential.backoff.slot.length=100ms
hive.server2.thrift.http.max.idle.time=1800s
hive.server2.thrift.http.max.worker.threads=500
hive.server2.thrift.http.min.worker.threads=5
hive.server2.thrift.http.path=cliservice
hive.server2.thrift.http.port=10001
hive.server2.thrift.http.worker.keepalive.time=60s
hive.server2.thrift.login.timeout=20s
hive.server2.thrift.max.message.size=104857600
hive.server2.thrift.max.worker.threads=100
hive.server2.thrift.min.worker.threads=5
hive.server2.thrift.port=10000
hive.server2.thrift.sasl.qop=auth
hive.server2.thrift.worker.keepalive.time=60s
hive.server2.transport.mode=binary
hive.server2.use.SSL=false
hive.server2.webui.host=0.0.0.0
hive.server2.webui.max.historic.queries=25
hive.server2.webui.max.threads=50
hive.server2.webui.port=10002
hive.server2.webui.spnego.principal=HTTP/_HOST@EXAMPLE.COM
hive.server2.webui.use.spnego=false
hive.server2.webui.use.ssl=false
hive.server2.zookeeper.namespace=hiveserver2
hive.service.metrics.class=org.apache.hadoop.hive.common.metrics.metrics2.CodahaleMetrics
hive.service.metrics.file.frequency=30000
hive.service.metrics.file.location=/var/log/hive/metrics-hiveserver2/metrics.log
hive.service.metrics.reporter=JSON_FILE, JMX
hive.session.history.enabled=false
hive.session.id=d4b067f0-b697-48ff-8223-3f4b527f090c
hive.session.silent=false
hive.skewjoin.key=100000
hive.skewjoin.mapjoin.map.tasks=10000
hive.skewjoin.mapjoin.min.split=33554432
hive.smbjoin.cache.rows=10000
hive.spark.client.connect.timeout=1000ms
hive.spark.client.future.timeout=60s
hive.spark.client.rpc.max.size=52428800
hive.spark.client.rpc.sasl.mechanisms=DIGEST-MD5
hive.spark.client.rpc.threads=8
hive.spark.client.secret.bits=256
hive.spark.client.server.connect.timeout=90000ms
hive.spark.dynamic.partition.pruning=false
hive.spark.dynamic.partition.pruning.map.join.only=false
hive.spark.dynamic.partition.pruning.max.data.size=104857600
hive.spark.job.monitor.timeout=60s
hive.ssl.protocol.blacklist=SSLv2,SSLv3
hive.stageid.rearrange=none
hive.start.cleanup.scratchdir=false
hive.stats.atomic=false
hive.stats.autogather=true
hive.stats.collect.rawdatasize=true
hive.stats.collect.scancols=true
hive.stats.collect.tablekeys=false
hive.stats.dbclass=fs
hive.stats.dbconnectionstring=jdbc:derby:;databaseName=TempStatsStore;create=true
hive.stats.deserialization.factor=1.0
hive.stats.fetch.column.stats=true
hive.stats.fetch.partition.stats=true
hive.stats.gather.num.threads=10
hive.stats.jdbc.timeout=30s
hive.stats.jdbcdriver=org.apache.derby.jdbc.EmbeddedDriver
hive.stats.join.factor=1.1
hive.stats.key.prefix.max.length=150
hive.stats.key.prefix.reserve.length=24
hive.stats.list.num.entries=10
hive.stats.map.num.entries=10
hive.stats.max.variable.length=100
hive.stats.ndv.error=20.0
hive.stats.reliable=false
hive.stats.retries.max=0
hive.stats.retries.wait=3000ms
hive.support.concurrency=true
hive.support.quoted.identifiers=column
hive.test.authz.sstd.hs2.mode=false
hive.test.mode=false
hive.test.mode.prefix=test_
hive.test.mode.samplefreq=32
hive.tez.auto.reducer.parallelism=false
hive.tez.container.size=-1
hive.tez.cpu.vcores=-1
hive.tez.dynamic.partition.pruning=true
hive.tez.dynamic.partition.pruning.max.data.size=104857600
hive.tez.dynamic.partition.pruning.max.event.size=1048576
hive.tez.exec.inplace.progress=true
hive.tez.exec.print.summary=false
hive.tez.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat
hive.tez.log.level=INFO
hive.tez.max.partition.factor=2.0
hive.tez.min.partition.factor=0.25
hive.tez.smb.number.waves=0.5
hive.transform.escape.input=false
hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DummyTxnManager
hive.txn.max.open.batch=1000
hive.txn.timeout=300s
hive.typecheck.on.insert=true
hive.udtf.auto.progress=false
hive.unlock.numretries=10
hive.user.install.directory=hdfs:///user/
hive.variable.substitute=true
hive.variable.substitute.depth=40
hive.vectorized.execution.enabled=true
hive.vectorized.execution.reduce.enabled=false
hive.vectorized.execution.reduce.groupby.enabled=true
hive.vectorized.groupby.checkinterval=4096
hive.vectorized.groupby.flush.percent=0.1
hive.vectorized.groupby.maxentries=1000000
hive.warehouse.subdir.inherit.perms=true
hive.zookeeper.clean.extra.nodes=false
hive.zookeeper.client.port=2181
hive.zookeeper.connection.basesleeptime=1000ms
hive.zookeeper.connection.max.retries=3
hive.zookeeper.namespace=hive_zookeeper_namespace_hive
hive.zookeeper.quorum=cdh-vm.dbaglobe.com
hive.zookeeper.session.timeout=1200000ms
javax.jdo.PersistenceManagerFactoryClass=org.datanucleus.api.jdo.JDOPersistenceManagerFactory
javax.jdo.option.ConnectionDriverName=org.apache.derby.jdbc.EmbeddedDriver
javax.jdo.option.ConnectionURL=jdbc:derby:;databaseName=metastore_db;create=true
javax.jdo.option.ConnectionUserName=APP
javax.jdo.option.DetachAllOnCommit=true
javax.jdo.option.Multithreaded=true
javax.jdo.option.NonTransactionalRead=true
mapreduce.input.fileinputformat.input.dir.recursive=false
mapreduce.input.fileinputformat.split.maxsize=256000000
mapreduce.input.fileinputformat.split.minsize=1
mapreduce.input.fileinputformat.split.minsize.per.node=1
mapreduce.input.fileinputformat.split.minsize.per.rack=1
mapreduce.job.committer.setup.cleanup.needed=false
mapreduce.job.committer.task.cleanup.needed=false
mapreduce.job.reduces=-1
mapreduce.reduce.speculative=true
output.formatter=org.apache.hadoop.hive.ql.exec.FetchFormatter$ThriftFormatter
output.protocol=6
parquet.memory.pool.ratio=0.5
rpc.engine.org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolPB=org.apache.hadoop.ipc.ProtobufRpcEngine
silent=off
spark.driver.memory=800000000
spark.dynamicAllocation.enabled=true
spark.dynamicAllocation.initialExecutors=1
spark.dynamicAllocation.maxExecutors=2147483647
spark.dynamicAllocation.minExecutors=1
spark.executor.cores=4
spark.executor.memory=1500000000
spark.master=yarn-cluster
spark.shuffle.service.enabled=true
spark.yarn.driver.memoryOverhead=102
spark.yarn.executor.memoryOverhead=614
startcode=1519649735704
stream.stderr.reporter.enabled=true
stream.stderr.reporter.prefix=reporter:
yarn.bin.path=/opt/cloudera/parcels/CDH-5.14.0-1.cdh5.14.0.p0.24/lib/hadoop/bin/yarn
env:CDH_AVRO_HOME=/opt/cloudera/parcels/CDH-5.14.0-1.cdh5.14.0.p0.24/lib/avro
env:CDH_CRUNCH_HOME=/usr/lib/crunch
env:CDH_FLUME_HOME=/opt/cloudera/parcels/CDH-5.14.0-1.cdh5.14.0.p0.24/lib/flume-ng
env:CDH_HADOOP_BIN=/opt/cloudera/parcels/CDH-5.14.0-1.cdh5.14.0.p0.24/lib/hadoop/bin/hadoop
env:CDH_HADOOP_HOME=/opt/cloudera/parcels/CDH-5.14.0-1.cdh5.14.0.p0.24/lib/hadoop
env:CDH_HBASE_HOME=/opt/cloudera/parcels/CDH-5.14.0-1.cdh5.14.0.p0.24/lib/hbase
env:CDH_HBASE_INDEXER_HOME=/opt/cloudera/parcels/CDH-5.14.0-1.cdh5.14.0.p0.24/lib/hbase-solr
env:CDH_HCAT_HOME=/opt/cloudera/parcels/CDH-5.14.0-1.cdh5.14.0.p0.24/lib/hive-hcatalog
env:CDH_HDFS_HOME=/opt/cloudera/parcels/CDH-5.14.0-1.cdh5.14.0.p0.24/lib/hadoop-hdfs
env:CDH_HIVE_HOME=/opt/cloudera/parcels/CDH-5.14.0-1.cdh5.14.0.p0.24/lib/hive
env:CDH_HTTPFS_HOME=/opt/cloudera/parcels/CDH-5.14.0-1.cdh5.14.0.p0.24/lib/hadoop-httpfs
env:CDH_HUE_HOME=/opt/cloudera/parcels/CDH-5.14.0-1.cdh5.14.0.p0.24/lib/hue
env:CDH_HUE_PLUGINS_HOME=/opt/cloudera/parcels/CDH-5.14.0-1.cdh5.14.0.p0.24/lib/hadoop
env:CDH_IMPALA_HOME=/opt/cloudera/parcels/CDH-5.14.0-1.cdh5.14.0.p0.24/lib/impala
env:CDH_KAFKA_HOME=/usr/lib/kafka
env:CDH_KMS_HOME=/opt/cloudera/parcels/CDH-5.14.0-1.cdh5.14.0.p0.24/lib/hadoop-kms
env:CDH_KUDU_HOME=/opt/cloudera/parcels/CDH-5.14.0-1.cdh5.14.0.p0.24/lib/kudu
env:CDH_LLAMA_HOME=/opt/cloudera/parcels/CDH-5.14.0-1.cdh5.14.0.p0.24/lib/llama
env:CDH_MR1_HOME=/opt/cloudera/parcels/CDH-5.14.0-1.cdh5.14.0.p0.24/lib/hadoop-0.20-mapreduce
env:CDH_MR2_HOME=/opt/cloudera/parcels/CDH-5.14.0-1.cdh5.14.0.p0.24/lib/hadoop-mapreduce
env:CDH_OOZIE_HOME=/opt/cloudera/parcels/CDH-5.14.0-1.cdh5.14.0.p0.24/lib/oozie
env:CDH_PARQUET_HOME=/opt/cloudera/parcels/CDH-5.14.0-1.cdh5.14.0.p0.24/lib/parquet
env:CDH_PIG_HOME=/opt/cloudera/parcels/CDH-5.14.0-1.cdh5.14.0.p0.24/lib/pig
env:CDH_SENTRY_HOME=/opt/cloudera/parcels/CDH-5.14.0-1.cdh5.14.0.p0.24/lib/sentry
env:CDH_SOLR_HOME=/opt/cloudera/parcels/CDH-5.14.0-1.cdh5.14.0.p0.24/lib/solr
env:CDH_SPARK_HOME=/opt/cloudera/parcels/CDH-5.14.0-1.cdh5.14.0.p0.24/lib/spark
env:CDH_SQOOP2_HOME=/opt/cloudera/parcels/CDH-5.14.0-1.cdh5.14.0.p0.24/lib/sqoop2
env:CDH_SQOOP_HOME=/usr/lib/sqoop
env:CDH_VERSION=5
env:CDH_YARN_HOME=/opt/cloudera/parcels/CDH-5.14.0-1.cdh5.14.0.p0.24/lib/hadoop-yarn
env:CDH_ZOOKEEPER_HOME=/opt/cloudera/parcels/CDH-5.14.0-1.cdh5.14.0.p0.24/lib/zookeeper
env:CGROUP_GROUP_BLKIO=
env:CGROUP_GROUP_CPU=
env:CGROUP_GROUP_CPUACCT=
env:CGROUP_GROUP_MEMORY=
env:CGROUP_ROOT_BLKIO=/sys/fs/cgroup/blkio
env:CGROUP_ROOT_CPU=/sys/fs/cgroup/cpu,cpuacct
env:CGROUP_ROOT_CPUACCT=/sys/fs/cgroup/cpu,cpuacct
env:CGROUP_ROOT_MEMORY=/sys/fs/cgroup/memory
env:CLOUDERA_MYSQL_CONNECTOR_JAR=/usr/share/java/mysql-connector-java.jar
env:CLOUDERA_ORACLE_CONNECTOR_JAR=/usr/share/java/oracle-connector-java.jar
env:CLOUDERA_POSTGRESQL_JDBC_JAR=/usr/share/cmf/lib/postgresql-9.0-801.jdbc4.jar
env:CMF_CONF_DIR=/etc/cloudera-scm-agent
env:CMF_PACKAGE_DIR=/usr/lib64/cmf/service
env:CM_ADD_TO_CP_DIRS=navigator/cdh57
env:CM_STATUS_CODES=STATUS_NONE HDFS_DFS_DIR_NOT_EMPTY HBASE_TABLE_DISABLED HBASE_TABLE_ENABLED JOBTRACKER_IN_STANDBY_MODE YARN_RM_IN_STANDBY_MODE
env:CONF_DIR=/run/cloudera-scm-agent/process/385-hive-HIVESERVER2
env:HADOOP_CLIENT_OPTS=-Djava.net.preferIPv4Stack=true -Djava.net.preferIPv4Stack=true -Xms4294967296 -Xmx4294967296 -XX:MaxPermSize=512M -XX:+UseParNewGC -XX:+UseConcMarkSweepGC -XX:CMSInitiatingOccupancyFraction=70 -XX:+CMSParallelRemarkEnabled -XX:+HeapDumpOnOutOfMemoryError -XX:HeapDumpPath=/tmp/hive_hive-HIVESERVER2-f2ddc1ecbff0faafbcffe8ebebc13cb1_pid9057.hprof -XX:OnOutOfMemoryError=/usr/lib64/cmf/service/common/killparent.sh
env:HADOOP_CONF_DIR=/run/cloudera-scm-agent/process/385-hive-HIVESERVER2/yarn-conf
env:HADOOP_HEAPSIZE=256
env:HADOOP_HOME=/opt/cloudera/parcels/CDH-5.14.0-1.cdh5.14.0.p0.24/lib/hadoop
env:HADOOP_HOME_WARN_SUPPRESS=true
env:HADOOP_MAPRED_HOME=/opt/cloudera/parcels/CDH-5.14.0-1.cdh5.14.0.p0.24/lib/hadoop-mapreduce
env:HADOOP_PREFIX=/opt/cloudera/parcels/CDH-5.14.0-1.cdh5.14.0.p0.24/lib/hadoop
env:HIVE_AUX_JARS_PATH=
env:HIVE_CONF_DIR=/run/cloudera-scm-agent/process/385-hive-HIVESERVER2
env:HIVE_DEFAULT_XML=/etc/hive/conf.dist/hive-default.xml
env:HIVE_HOME=/opt/cloudera/parcels/CDH-5.14.0-1.cdh5.14.0.p0.24/lib/hive
env:HIVE_LOGFILE=hadoop-cmf-hive-HIVESERVER2-cdh-vm.dbaglobe.com.log.out
env:HIVE_LOG_DIR=/var/log/hive
env:HIVE_METASTORE_DATABASE_TYPE=mysql
env:HIVE_ROOT_LOGGER=ERROR,RFA
env:HOME=/var/lib/hive
env:JAVA_HOME=/usr/java/jdk1.8.0_162
env:JSVC_HOME=/opt/cloudera/parcels/CDH-5.14.0-1.cdh5.14.0.p0.24/lib/bigtop-utils
env:KEYTRUSTEE_KP_HOME=/usr/share/keytrustee-keyprovider
env:KEYTRUSTEE_SERVER_HOME=/usr/lib/keytrustee-server
env:LANG=en_US.UTF-8
env:LD_LIBRARY_PATH=:/opt/cloudera/parcels/CDH-5.14.0-1.cdh5.14.0.p0.24/lib/hadoop/lib/native
env:MALLOC_ARENA_MAX=4
env:MGMT_HOME=/usr/share/cmf
env:ORACLE_HOME=/usr/share/oracle/instantclient
env:PARCELS_ROOT=/opt/cloudera/parcels
env:PARCEL_DIRNAMES=CDH-5.14.0-1.cdh5.14.0.p0.24
env:PATH=/sbin:/usr/sbin:/bin:/usr/bin
env:PWD=/run/cloudera-scm-agent/process/385-hive-HIVESERVER2
env:SCM_DEFINES_SCRIPTS=/opt/cloudera/parcels/CDH-5.14.0-1.cdh5.14.0.p0.24/meta/cdh_env.sh
env:SEARCH_HOME=/opt/cloudera/parcels/CDH-5.14.0-1.cdh5.14.0.p0.24/lib/search
env:SENTRY_HOME=/opt/cloudera/parcels/CDH-5.14.0-1.cdh5.14.0.p0.24/lib/sentry
env:SERVICE_LIST=beeline cleardanglingscratchdir cli help hiveburninclient hiveserver2 hiveserver hwi jar lineage metastore metatool orcfiledump rcfilecat schemaTool version 
env:SHELL=/bin/bash
env:SHLVL=2
env:SPARK_CONF_DIR=/etc/spark/conf
env:SPARK_HOME=/opt/cloudera/parcels/CDH-5.14.0-1.cdh5.14.0.p0.24/lib/spark
env:SPARK_ON_YARN=true
env:SUPERVISOR_ENABLED=1
env:SUPERVISOR_GROUP_NAME=385-hive-HIVESERVER2
env:SUPERVISOR_PROCESS_NAME=385-hive-HIVESERVER2
env:SUPERVISOR_SERVER_URL=unix:///run/cloudera-scm-agent/supervisor/supervisord.sock
env:TOMCAT_HOME=/opt/cloudera/parcels/CDH-5.14.0-1.cdh5.14.0.p0.24/lib/bigtop-tomcat
env:WEBHCAT_DEFAULT_XML=/opt/cloudera/parcels/CDH-5.14.0-1.cdh5.14.0.p0.24/etc/hive-webhcat/conf.dist/webhcat-default.xml
env:XDG_RUNTIME_DIR=/run/user/0
env:XDG_SESSION_ID=c2
env:YARN_OPTS=-Xmx825955249 -Djava.net.preferIPv4Stack=true 
system:awt.toolkit=sun.awt.X11.XToolkit
system:file.encoding=UTF-8
system:file.encoding.pkg=sun.io
system:file.separator=/
system:hadoop.home.dir=/opt/cloudera/parcels/CDH-5.14.0-1.cdh5.14.0.p0.24/lib/hadoop
system:hadoop.id.str=
system:hadoop.log.dir=/opt/cloudera/parcels/CDH-5.14.0-1.cdh5.14.0.p0.24/lib/hadoop/logs
system:hadoop.log.file=hadoop.log
system:hadoop.policy.file=hadoop-policy.xml
system:hadoop.root.logger=INFO,console
system:hadoop.security.logger=INFO,NullAppender
system:java.awt.graphicsenv=sun.awt.X11GraphicsEnvironment
system:java.awt.printerjob=sun.print.PSPrinterJob
system:java.class.version=52.0
system:java.endorsed.dirs=/usr/java/jdk1.8.0_162/jre/lib/endorsed
system:java.ext.dirs=/usr/java/jdk1.8.0_162/jre/lib/ext:/usr/java/packages/lib/ext
system:java.home=/usr/java/jdk1.8.0_162/jre
system:java.io.tmpdir=/tmp
system:java.library.path=/opt/cloudera/parcels/CDH-5.14.0-1.cdh5.14.0.p0.24/lib/hadoop/lib/native
system:java.net.preferIPv4Stack=true
system:java.runtime.name=Java(TM) SE Runtime Environment
system:java.runtime.version=1.8.0_162-b12
system:java.specification.name=Java Platform API Specification
system:java.specification.vendor=Oracle Corporation
system:java.specification.version=1.8
system:java.vendor=Oracle Corporation
system:java.vendor.url=http://java.oracle.com/
system:java.vendor.url.bug=http://bugreport.sun.com/bugreport/
system:java.version=1.8.0_162
system:java.vm.info=mixed mode
system:java.vm.name=Java HotSpot(TM) 64-Bit Server VM
system:java.vm.specification.name=Java Virtual Machine Specification
system:java.vm.specification.vendor=Oracle Corporation
system:java.vm.specification.version=1.8
system:java.vm.vendor=Oracle Corporation
system:java.vm.version=25.162-b12
system:line.separator=

system:os.arch=amd64
system:os.name=Linux
system:os.version=3.10.0-693.17.1.el7.x86_64
system:path.separator=:
system:sun.arch.data.model=64
system:sun.boot.class.path=/usr/java/jdk1.8.0_162/jre/lib/resources.jar:/usr/java/jdk1.8.0_162/jre/lib/rt.jar:/usr/java/jdk1.8.0_162/jre/lib/sunrsasign.jar:/usr/java/jdk1.8.0_162/jre/lib/jsse.jar:/usr/java/jdk1.8.0_162/jre/lib/jce.jar:/usr/java/jdk1.8.0_162/jre/lib/charsets.jar:/usr/java/jdk1.8.0_162/jre/lib/jfr.jar:/usr/java/jdk1.8.0_162/jre/classes
system:sun.boot.library.path=/usr/java/jdk1.8.0_162/jre/lib/amd64
system:sun.cpu.endian=little
system:sun.cpu.isalist=
system:sun.io.unicode.encoding=UnicodeLittle
system:sun.java.launcher=SUN_STANDARD
system:sun.jnu.encoding=UTF-8
system:sun.management.compiler=HotSpot 64-Bit Tiered Compilers
system:sun.os.patch.level=unknown
system:user.country=US
system:user.dir=/run/cloudera-scm-agent/process/385-hive-HIVESERVER2
system:user.home=/var/lib/hive
system:user.language=en
system:user.name=hive
system:user.timezone=Asia/Singapore
682 rows selected (0.166 seconds)
Closing: 0: jdbc:hive2://cdh-vm.dbaglobe.com:10000/test
0: jdbc:hive2://cdh-vm.dbaglobe.com:10000/tes> 

[donghua@cdh-vm scripts]$ 
Viewing all 604 articles
Browse latest View live