How to log connection detail denied by firewall

January 25, 2018, 3:41 am

≫ Next: Configure EM Express for Oracle 12.2

≪ Previous: How to fix "Service Monitor" and "Host Monitor" failure during Cloudera CDH5 cluster restart

# Env: RHEL 7

[root@cdh-vm ~]# firewall-cmd  --get-log-denied

off

[root@cdh-vm ~]# firewall-cmd  --set-log-denied=all

success

[root@cdh-vm ~]# firewall-cmd  --get-log-denied

all

/var/log/message:

Jan 25 06:37:23 cdh-vm kernel: FINAL_REJECT: IN=enp0s3 OUT= MAC=08:00:27:8e:ba:87:08:00:27:d5:2f:09:08:00 SRC=192.168.56.202 DST=192.168.56.10 LEN=60 TOS=0x00 PREC=0x00 TTL=64 ID=32184 DF PROTO=TCP SPT=54142 DPT=7180 WINDOW=29200 RES=0x00 SYN URGP=0 

Jan 25 06:37:23 cdh-vm kernel: FINAL_REJECT: IN=enp0s3 OUT= MAC=08:00:27:8e:ba:87:08:00:27:d5:2f:09:08:00 SRC=192.168.56.202 DST=192.168.56.10 LEN=60 TOS=0x00 PREC=0x00 TTL=64 ID=41738 DF PROTO=TCP SPT=54144 DPT=7180 WINDOW=29200 RES=0x00 SYN URGP=0 

Jan 25 06:37:23 cdh-vm kernel: FINAL_REJECT: IN=enp0s3 OUT= MAC=08:00:27:8e:ba:87:08:00:27:d5:2f:09:08:00 SRC=192.168.56.202 DST=192.168.56.10 LEN=60 TOS=0x00 PREC=0x00 TTL=64 ID=35171 DF PROTO=TCP SPT=54146 DPT=7180 WINDOW=29200 RES=0x00 SYN URGP=0 

Jan 25 06:37:23 cdh-vm kernel: FINAL_REJECT: IN=enp0s3 OUT= MAC=08:00:27:8e:ba:87:08:00:27:d5:2f:09:08:00 SRC=192.168.56.202 DST=192.168.56.10 LEN=60 TOS=0x00 PREC=0x00 TTL=64 ID=43104 DF PROTO=TCP SPT=54148 DPT=7180 WINDOW=29200 RES=0x00 SYN URGP=0 

Jan 25 06:37:24 cdh-vm kernel: FINAL_REJECT: IN=enp0s3 OUT= MAC=08:00:27:8e:ba:87:08:00:27:d5:2f:09:08:00 SRC=192.168.56.202 DST=192.168.56.10 LEN=60 TOS=0x00 PREC=0x00 TTL=64 ID=28193 DF PROTO=TCP SPT=54150 DPT=7180 WINDOW=29200 RES=0x00 SYN URGP=0 

Jan 25 06:37:33 cdh-vm kernel: FINAL_REJECT: IN=enp0s3 OUT= MAC=08:00:27:8e:ba:87:08:00:27:d5:2f:09:08:00 SRC=192.168.56.202 DST=192.168.56.10 LEN=60 TOS=0x00 PREC=0x00 TTL=64 ID=31855 DF PROTO=TCP SPT=54152 DPT=7180 WINDOW=29200 RES=0x00 SYN URGP=0 

↧

Configure EM Express for Oracle 12.2

January 26, 2018, 10:31 am

≫ Next: Use Sqoop to import into hive tables (source is a view)

≪ Previous: How to log connection detail denied by firewall

SQL> select dbms_xdb_config.getHttpsPort() from dual;

DBMS_XDB_CONFIG.GETHTTPSPORT()
------------------------------
0

SQL> select dbms_xdb.getHttpPort() from dual;

DBMS_XDB.GETHTTPPORT()
----------------------
0

SQL> exec dbms_xdb_config.sethttpsport(5500);

PL/SQL procedure successfully completed.

SQL> select dbms_xdb_config.getHttpsPort() from dual;

DBMS_XDB_CONFIG.GETHTTPSPORT()
------------------------------
5500

SQL> !lsnrctl status |grep -i http
(DESCRIPTION=(ADDRESS=(PROTOCOL=tcps)(HOST=vmxdb01.dbaglobe.com)(PORT=5500))(Security=(my_wallet_directory=/u01/app/oracle/admin/orcl/xdb_wallet))(Presentation=HTTP)(Session=RAW))

SQL> select dbms_xdb.getHttpPort() from dual;

DBMS_XDB.GETHTTPPORT()
----------------------
0

↧

Use Sqoop to import into hive tables (source is a view)

January 26, 2018, 10:06 pm

≫ Next: Use sqoop to perform incremental import (--check-column=id --incremental=append --last-value=)

≪ Previous: Configure EM Express for Oracle 12.2

[donghua@cdh-vm test_db-master]$ ls -l /opt/cloudera/parcels/CDH/lib/sqoop/lib/
total 0
lrwxrwxrwx 1 root root 35 Nov 9 13:49 ant-contrib-1.0b3.jar -> ../../../jars/ant-contrib-1.0b3.jar
lrwxrwxrwx 1 root root 40 Nov 9 13:49 ant-eclipse-1.0-jvm1.2.jar -> ../../../jars/ant-eclipse-1.0-jvm1.2.jar
lrwxrwxrwx 1 root root 41 Nov 9 13:42 avro-mapred-hadoop2.jar -> ../../../lib/avro/avro-mapred-hadoop2.jar
lrwxrwxrwx 1 root root 26 Nov 9 13:42 avro.jar -> ../../../lib/avro/avro.jar
lrwxrwxrwx 1 root root 35 Nov 9 13:49 commons-codec-1.4.jar -> ../../../jars/commons-codec-1.4.jar
lrwxrwxrwx 1 root root 40 Nov 9 13:49 commons-compress-1.4.1.jar -> ../../../jars/commons-compress-1.4.1.jar
lrwxrwxrwx 1 root root 32 Nov 9 13:49 commons-io-1.4.jar -> ../../../jars/commons-io-1.4.jar
lrwxrwxrwx 1 root root 36 Nov 9 13:49 commons-jexl-2.1.1.jar -> ../../../jars/commons-jexl-2.1.1.jar
lrwxrwxrwx 1 root root 35 Nov 9 13:49 commons-lang3-3.4.jar -> ../../../jars/commons-lang3-3.4.jar
lrwxrwxrwx 1 root root 39 Nov 9 13:49 commons-logging-1.1.3.jar -> ../../../jars/commons-logging-1.1.3.jar
lrwxrwxrwx 1 root root 30 Nov 9 13:49 fastutil-6.3.jar -> ../../../jars/fastutil-6.3.jar
lrwxrwxrwx 1 root root 33 Nov 9 13:49 hsqldb-1.8.0.10.jar -> ../../../jars/hsqldb-1.8.0.10.jar
lrwxrwxrwx 1 root root 43 Nov 9 13:49 jackson-annotations-2.3.1.jar -> ../../../jars/jackson-annotations-2.3.1.jar
lrwxrwxrwx 1 root root 36 Nov 9 13:49 jackson-core-2.3.1.jar -> ../../../jars/jackson-core-2.3.1.jar
lrwxrwxrwx 1 root root 40 Nov 9 13:49 jackson-core-asl-1.8.8.jar -> ../../../jars/jackson-core-asl-1.8.8.jar
lrwxrwxrwx 1 root root 40 Nov 9 13:49 jackson-databind-2.3.1.jar -> ../../../jars/jackson-databind-2.3.1.jar
lrwxrwxrwx 1 root root 42 Nov 9 13:49 jackson-mapper-asl-1.8.8.jar -> ../../../jars/jackson-mapper-asl-1.8.8.jar
lrwxrwxrwx 1 root root 36 Nov 9 13:42 kite-data-core.jar -> ../../../lib/kite/kite-data-core.jar
lrwxrwxrwx 1 root root 36 Nov 9 13:42 kite-data-hive.jar -> ../../../lib/kite/kite-data-hive.jar
lrwxrwxrwx 1 root root 41 Nov 9 13:42 kite-data-mapreduce.jar -> ../../../lib/kite/kite-data-mapreduce.jar
lrwxrwxrwx 1 root root 47 Nov 9 13:42 kite-hadoop-compatibility.jar -> ../../../lib/kite/kite-hadoop-compatibility.jar
lrwxrwxrwx 1 root root 29 Nov 9 13:49 opencsv-2.3.jar -> ../../../jars/opencsv-2.3.jar
lrwxrwxrwx 1 root root 31 Nov 9 13:49 paranamer-2.3.jar -> ../../../jars/paranamer-2.3.jar
lrwxrwxrwx 1 root root 37 Nov 9 13:42 parquet-avro.jar -> ../../../lib/parquet/parquet-avro.jar
lrwxrwxrwx 1 root root 39 Nov 9 13:42 parquet-column.jar -> ../../../lib/parquet/parquet-column.jar
lrwxrwxrwx 1 root root 39 Nov 9 13:42 parquet-common.jar -> ../../../lib/parquet/parquet-common.jar
lrwxrwxrwx 1 root root 41 Nov 9 13:42 parquet-encoding.jar -> ../../../lib/parquet/parquet-encoding.jar
lrwxrwxrwx 1 root root 39 Nov 9 13:42 parquet-format.jar -> ../../../lib/parquet/parquet-format.jar
lrwxrwxrwx 1 root root 39 Nov 9 13:42 parquet-hadoop.jar -> ../../../lib/parquet/parquet-hadoop.jar
lrwxrwxrwx 1 root root 40 Nov 9 13:42 parquet-jackson.jar -> ../../../lib/parquet/parquet-jackson.jar
lrwxrwxrwx 1 root root 33 Nov 9 13:49 slf4j-api-1.7.5.jar -> ../../../jars/slf4j-api-1.7.5.jar
lrwxrwxrwx 1 root root 37 Nov 9 13:49 snappy-java-1.0.4.1.jar -> ../../../jars/snappy-java-1.0.4.1.jar
lrwxrwxrwx 1 root root 24 Nov 9 13:49 xz-1.0.jar -> ../../../jars/xz-1.0.jar
[donghua@cdh-vm test_db-master]$
[donghua@cdh-vm test_db-master]$ sudo ln -s /usr/share/java/mysql-connector-java.jar /opt/cloudera/parcels/CDH/lib/sqoop/lib/
[sudo] password for donghua:
[donghua@cdh-vm test_db-master]$ readlink /opt/cloudera/parcels/CDH/lib/sqoop/lib/mysql-connector-java.jar
/usr/share/java/mysql-connector-java.jar
[donghua@cdh-vm test_db-master]$

MariaDB [(none)]> create user employee_user identified by 'password';
Query OK, 0 rows affected (0.07 sec)

MariaDB [(none)]> grant all on employees.* to employee_user;
Query OK, 0 rows affected (0.04 sec)

MariaDB [(none)]> show grants for employee_user;
+--------------------------------------------------------------------------------------------------------------+
| Grants for employee_user@% |
+--------------------------------------------------------------------------------------------------------------+
| GRANT USAGE ON *.* TO 'employee_user'@'%' IDENTIFIED BY PASSWORD '*2470C0C06DEE42FD1618BB99005ADCA2EC9D1E19' |
| GRANT ALL PRIVILEGES ON `employees`.* TO 'employee_user'@'%' |
+--------------------------------------------------------------------------------------------------------------+
2 rows in set (0.00 sec)

[donghua@cdh-vm test_db-master]$ sqoop list-databases --connect jdbc:mysql://cdh-vm.dbaglobe.com --username employee_user --password password
Warning: /opt/cloudera/parcels/CDH-5.13.1-1.cdh5.13.1.p0.2/bin/../lib/sqoop/../accumulo does not exist! Accumulo imports will fail.
Please set $ACCUMULO_HOME to the root of your Accumulo installation.
18/01/26 23:32:56 INFO sqoop.Sqoop: Running Sqoop version: 1.4.6-cdh5.13.1
18/01/26 23:32:56 WARN tool.BaseSqoopTool: Setting your password on the command-line is insecure. Consider using -P instead.
18/01/26 23:32:56 INFO manager.MySQLManager: Preparing to use a MySQL streaming resultset.
information_schema
employees

[donghua@cdh-vm test_db-master]$ sqoop list-tables --connect jdbc:mysql://cdh-vm.dbaglobe.com/employees --username employee_user --password password
Warning: /opt/cloudera/parcels/CDH-5.13.1-1.cdh5.13.1.p0.2/bin/../lib/sqoop/../accumulo does not exist! Accumulo imports will fail.
Please set $ACCUMULO_HOME to the root of your Accumulo installation.
18/01/26 23:33:17 INFO sqoop.Sqoop: Running Sqoop version: 1.4.6-cdh5.13.1
18/01/26 23:33:17 WARN tool.BaseSqoopTool: Setting your password on the command-line is insecure. Consider using -P instead.
18/01/26 23:33:17 INFO manager.MySQLManager: Preparing to use a MySQL streaming resultset.
current_dept_emp
departments
dept_emp
dept_emp_latest_date
dept_manager
employees
salaries
titles

[donghua@cdh-vm test_db-master]$ sqoop import --connect jdbc:mysql://cdh-vm.dbaglobe.com/employees --username employee_user --password password --table current_dept_emp
Warning: /opt/cloudera/parcels/CDH-5.13.1-1.cdh5.13.1.p0.2/bin/../lib/sqoop/../accumulo does not exist! Accumulo imports will fail.
Please set $ACCUMULO_HOME to the root of your Accumulo installation.
18/01/26 23:37:48 INFO sqoop.Sqoop: Running Sqoop version: 1.4.6-cdh5.13.1
18/01/26 23:37:48 WARN tool.BaseSqoopTool: Setting your password on the command-line is insecure. Consider using -P instead.
18/01/26 23:37:49 INFO manager.MySQLManager: Preparing to use a MySQL streaming resultset.
18/01/26 23:37:49 INFO tool.CodeGenTool: Beginning code generation
18/01/26 23:37:49 INFO manager.SqlManager: Executing SQL statement: SELECT t.* FROM `current_dept_emp` AS t LIMIT 1
18/01/26 23:37:49 INFO manager.SqlManager: Executing SQL statement: SELECT t.* FROM `current_dept_emp` AS t LIMIT 1
18/01/26 23:37:49 INFO orm.CompilationManager: HADOOP_MAPRED_HOME is /opt/cloudera/parcels/CDH/lib/hadoop-mapreduce
Note: /tmp/sqoop-donghua/compile/f0cac41ee0eb9df573aa4341b36a671d/current_dept_emp.java uses or overrides a deprecated API.
Note: Recompile with -Xlint:deprecation for details.
18/01/26 23:37:51 INFO orm.CompilationManager: Writing jar file: /tmp/sqoop-donghua/compile/f0cac41ee0eb9df573aa4341b36a671d/current_dept_emp.jar
18/01/26 23:37:51 WARN manager.MySQLManager: It looks like you are importing from mysql.
18/01/26 23:37:51 WARN manager.MySQLManager: This transfer can be faster! Use the --direct
18/01/26 23:37:51 WARN manager.MySQLManager: option to exercise a MySQL-specific fast path.
18/01/26 23:37:51 INFO manager.MySQLManager: Setting zero DATETIME behavior to convertToNull (mysql)
18/01/26 23:37:51 ERROR tool.ImportTool: Import failed: No primary key could be found for table current_dept_emp. Please specify one with --split-by or perform a sequential import with '-m 1'.

[donghua@cdh-vm test_db-master]$ sqoop import --connect jdbc:mysql://cdh-vm.dbaglobe.com/employees --username employee_user --password password --table current_dept_emp -m 1
Warning: /opt/cloudera/parcels/CDH-5.13.1-1.cdh5.13.1.p0.2/bin/../lib/sqoop/../accumulo does not exist! Accumulo imports will fail.
Please set $ACCUMULO_HOME to the root of your Accumulo installation.
18/01/26 23:38:08 INFO sqoop.Sqoop: Running Sqoop version: 1.4.6-cdh5.13.1
18/01/26 23:38:08 WARN tool.BaseSqoopTool: Setting your password on the command-line is insecure. Consider using -P instead.
18/01/26 23:38:08 INFO manager.MySQLManager: Preparing to use a MySQL streaming resultset.
18/01/26 23:38:08 INFO tool.CodeGenTool: Beginning code generation
18/01/26 23:38:09 INFO manager.SqlManager: Executing SQL statement: SELECT t.* FROM `current_dept_emp` AS t LIMIT 1
18/01/26 23:38:09 INFO manager.SqlManager: Executing SQL statement: SELECT t.* FROM `current_dept_emp` AS t LIMIT 1
18/01/26 23:38:09 INFO orm.CompilationManager: HADOOP_MAPRED_HOME is /opt/cloudera/parcels/CDH/lib/hadoop-mapreduce
Note: /tmp/sqoop-donghua/compile/3cb418ffe5487ad8ed8b36689ec598f4/current_dept_emp.java uses or overrides a deprecated API.
Note: Recompile with -Xlint:deprecation for details.
18/01/26 23:38:10 INFO orm.CompilationManager: Writing jar file: /tmp/sqoop-donghua/compile/3cb418ffe5487ad8ed8b36689ec598f4/current_dept_emp.jar
18/01/26 23:38:11 WARN manager.MySQLManager: It looks like you are importing from mysql.
18/01/26 23:38:11 WARN manager.MySQLManager: This transfer can be faster! Use the --direct
18/01/26 23:38:11 WARN manager.MySQLManager: option to exercise a MySQL-specific fast path.
18/01/26 23:38:11 INFO manager.MySQLManager: Setting zero DATETIME behavior to convertToNull (mysql)
18/01/26 23:38:11 INFO mapreduce.ImportJobBase: Beginning import of current_dept_emp
18/01/26 23:38:11 INFO Configuration.deprecation: mapred.jar is deprecated. Instead, use mapreduce.job.jar
18/01/26 23:38:12 INFO Configuration.deprecation: mapred.map.tasks is deprecated. Instead, use mapreduce.job.maps
18/01/26 23:38:12 INFO client.RMProxy: Connecting to ResourceManager at cdh-vm.dbaglobe.com/192.168.56.10:8032
18/01/26 23:38:17 INFO db.DBInputFormat: Using read commited transaction isolation
18/01/26 23:38:18 INFO mapreduce.JobSubmitter: number of splits:1
18/01/26 23:38:18 INFO mapreduce.JobSubmitter: Submitting tokens for job: job_1517023991003_0001
18/01/26 23:38:19 INFO impl.YarnClientImpl: Submitted application application_1517023991003_0001
18/01/26 23:38:19 INFO mapreduce.Job: The url to track the job: http://cdh-vm.dbaglobe.com:8088/proxy/application_1517023991003_0001/
18/01/26 23:38:19 INFO mapreduce.Job: Running job: job_1517023991003_0001
18/01/26 23:38:30 INFO mapreduce.Job: Job job_1517023991003_0001 running in uber mode : false
18/01/26 23:38:30 INFO mapreduce.Job: map 0% reduce 0%
18/01/26 23:38:42 INFO mapreduce.Job: map 100% reduce 0%
18/01/26 23:38:43 INFO mapreduce.Job: Job job_1517023991003_0001 completed successfully
18/01/26 23:38:43 INFO mapreduce.Job: Counters: 30
File System Counters
FILE: Number of bytes read=0
FILE: Number of bytes written=173876
FILE: Number of read operations=0
FILE: Number of large read operations=0
FILE: Number of write operations=0
HDFS: Number of bytes read=87
HDFS: Number of bytes written=10110817
HDFS: Number of read operations=4
HDFS: Number of large read operations=0
HDFS: Number of write operations=2
Job Counters
Launched map tasks=1
Other local map tasks=1
Total time spent by all maps in occupied slots (ms)=8922
Total time spent by all reduces in occupied slots (ms)=0
Total time spent by all map tasks (ms)=8922
Total vcore-milliseconds taken by all map tasks=8922
Total megabyte-milliseconds taken by all map tasks=13704192
Map-Reduce Framework
Map input records=300024
Map output records=300024
Input split bytes=87
Spilled Records=0
Failed Shuffles=0
Merged Map outputs=0
GC time elapsed (ms)=109
CPU time spent (ms)=3330
Physical memory (bytes) snapshot=281448448
Virtual memory (bytes) snapshot=2788491264
Total committed heap usage (bytes)=246939648
File Input Format Counters
Bytes Read=0
File Output Format Counters
Bytes Written=10110817
18/01/26 23:38:43 INFO mapreduce.ImportJobBase: Transferred 9.6424 MB in 31.2284 seconds (316.1811 KB/sec)
18/01/26 23:38:43 INFO mapreduce.ImportJobBase: Retrieved 300024 records.
[donghua@cdh-vm test_db-master]$ hdfs dfs -ls
Found 3 items
drwx------ - donghua supergroup 0 2018-01-26 23:38 .staging
drwxr-xr-x - donghua supergroup 0 2018-01-26 23:38 current_dept_emp
-rw-r--r-- 1 donghua supergroup 15 2018-01-20 04:41 test.csv
[donghua@cdh-vm test_db-master]$

0: jdbc:hive2://localhost:10000/default> create database employees;
INFO : Compiling command(queryId=hive_20180126234646_4c4d2716-9d75-4786-8c31-1ee517688165): create database employees
INFO : Semantic Analysis Completed
INFO : Returning Hive schema: Schema(fieldSchemas:null, properties:null)
INFO : Completed compiling command(queryId=hive_20180126234646_4c4d2716-9d75-4786-8c31-1ee517688165); Time taken: 0.043 seconds
INFO : Executing command(queryId=hive_20180126234646_4c4d2716-9d75-4786-8c31-1ee517688165): create database employees
INFO : Starting task [Stage-0:DDL] in serial mode
INFO : Completed executing command(queryId=hive_20180126234646_4c4d2716-9d75-4786-8c31-1ee517688165); Time taken: 0.182 seconds
INFO : OK
No rows affected (0.351 seconds)

0: jdbc:hive2://localhost:10000/default> !sh hdfs dfs -ls /user/hive/warehouse/
Java HotSpot(TM) 64-Bit Server VM warning: ignoring option MaxPermSize=512M; support was removed in 8.0
Found 1 items
drwxrwxrwt - anonymous hive 0 2018-01-26 23:46 /user/hive/warehouse/employees.db

[donghua@cdh-vm test_db-master]$ sqoop import --connect jdbc:mysql://cdh-vm.dbaglobe.com/employees --username employee_user --password password --table current_dept_emp --split-by=emp_no --hive-import --create-hive-table --hive-table=employees.current_dept_emp --warehouse-dir=/user/hive/warehouse
Warning: /opt/cloudera/parcels/CDH-5.13.1-1.cdh5.13.1.p0.2/bin/../lib/sqoop/../accumulo does not exist! Accumulo imports will fail.
Please set $ACCUMULO_HOME to the root of your Accumulo installation.
18/01/26 23:56:32 INFO sqoop.Sqoop: Running Sqoop version: 1.4.6-cdh5.13.1
18/01/26 23:56:32 WARN tool.BaseSqoopTool: Setting your password on the command-line is insecure. Consider using -P instead.
18/01/26 23:56:32 INFO tool.BaseSqoopTool: Using Hive-specific delimiters for output. You can override
18/01/26 23:56:32 INFO tool.BaseSqoopTool: delimiters with --fields-terminated-by, etc.
18/01/26 23:56:32 WARN tool.BaseSqoopTool: It seems that you're doing hive import directly into default
18/01/26 23:56:32 WARN tool.BaseSqoopTool: hive warehouse directory which is not supported. Sqoop is
18/01/26 23:56:32 WARN tool.BaseSqoopTool: firstly importing data into separate directory and then
18/01/26 23:56:32 WARN tool.BaseSqoopTool: inserting data into hive. Please consider removing
18/01/26 23:56:32 WARN tool.BaseSqoopTool: --target-dir or --warehouse-dir into /user/hive/warehouse in
18/01/26 23:56:32 WARN tool.BaseSqoopTool: case that you will detect any issues.
18/01/26 23:56:32 INFO manager.MySQLManager: Preparing to use a MySQL streaming resultset.
18/01/26 23:56:32 INFO tool.CodeGenTool: Beginning code generation
18/01/26 23:56:33 INFO manager.SqlManager: Executing SQL statement: SELECT t.* FROM `current_dept_emp` AS t LIMIT 1
18/01/26 23:56:33 INFO manager.SqlManager: Executing SQL statement: SELECT t.* FROM `current_dept_emp` AS t LIMIT 1
18/01/26 23:56:33 INFO orm.CompilationManager: HADOOP_MAPRED_HOME is /opt/cloudera/parcels/CDH/lib/hadoop-mapreduce
Note: /tmp/sqoop-donghua/compile/35ced35e8590fbbd798fa058e0584fed/current_dept_emp.java uses or overrides a deprecated API.
Note: Recompile with -Xlint:deprecation for details.
18/01/26 23:56:35 INFO orm.CompilationManager: Writing jar file: /tmp/sqoop-donghua/compile/35ced35e8590fbbd798fa058e0584fed/current_dept_emp.jar
18/01/26 23:56:35 WARN manager.MySQLManager: It looks like you are importing from mysql.
18/01/26 23:56:35 WARN manager.MySQLManager: This transfer can be faster! Use the --direct
18/01/26 23:56:35 WARN manager.MySQLManager: option to exercise a MySQL-specific fast path.
18/01/26 23:56:35 INFO manager.MySQLManager: Setting zero DATETIME behavior to convertToNull (mysql)
18/01/26 23:56:35 INFO mapreduce.ImportJobBase: Beginning import of current_dept_emp
18/01/26 23:56:35 INFO Configuration.deprecation: mapred.jar is deprecated. Instead, use mapreduce.job.jar
18/01/26 23:56:36 INFO Configuration.deprecation: mapred.map.tasks is deprecated. Instead, use mapreduce.job.maps
18/01/26 23:56:36 INFO client.RMProxy: Connecting to ResourceManager at cdh-vm.dbaglobe.com/192.168.56.10:8032
18/01/26 23:56:41 INFO db.DBInputFormat: Using read commited transaction isolation
18/01/26 23:56:41 INFO db.DataDrivenDBInputFormat: BoundingValsQuery: SELECT MIN(`emp_no`), MAX(`emp_no`) FROM `current_dept_emp`
18/01/26 23:56:41 INFO db.IntegerSplitter: Split size: 122499; Num splits: 4 from: 10001 to: 499999
18/01/26 23:56:42 INFO mapreduce.JobSubmitter: number of splits:4
18/01/26 23:56:42 INFO mapreduce.JobSubmitter: Submitting tokens for job: job_1517023991003_0003
18/01/26 23:56:42 INFO impl.YarnClientImpl: Submitted application application_1517023991003_0003
18/01/26 23:56:42 INFO mapreduce.Job: The url to track the job: http://cdh-vm.dbaglobe.com:8088/proxy/application_1517023991003_0003/
18/01/26 23:56:42 INFO mapreduce.Job: Running job: job_1517023991003_0003
18/01/26 23:56:50 INFO mapreduce.Job: Job job_1517023991003_0003 running in uber mode : false
18/01/26 23:56:50 INFO mapreduce.Job: map 0% reduce 0%
18/01/26 23:56:58 INFO mapreduce.Job: map 25% reduce 0%
18/01/26 23:57:03 INFO mapreduce.Job: map 50% reduce 0%
18/01/26 23:57:08 INFO mapreduce.Job: map 75% reduce 0%
18/01/26 23:57:13 INFO mapreduce.Job: map 100% reduce 0%
18/01/26 23:57:14 INFO mapreduce.Job: Job job_1517023991003_0003 completed successfully
18/01/26 23:57:14 INFO mapreduce.Job: Counters: 30
File System Counters
FILE: Number of bytes read=0
FILE: Number of bytes written=698232
FILE: Number of read operations=0
FILE: Number of large read operations=0
FILE: Number of write operations=0
HDFS: Number of bytes read=464
HDFS: Number of bytes written=10110817
HDFS: Number of read operations=16
HDFS: Number of large read operations=0
HDFS: Number of write operations=8
Job Counters
Launched map tasks=4
Other local map tasks=4
Total time spent by all maps in occupied slots (ms)=17721
Total time spent by all reduces in occupied slots (ms)=0
Total time spent by all map tasks (ms)=17721
Total vcore-milliseconds taken by all map tasks=17721
Total megabyte-milliseconds taken by all map tasks=27219456
Map-Reduce Framework
Map input records=300024
Map output records=300024
Input split bytes=464
Spilled Records=0
Failed Shuffles=0
Merged Map outputs=0
GC time elapsed (ms)=313
CPU time spent (ms)=8810
Physical memory (bytes) snapshot=927260672
Virtual memory (bytes) snapshot=11156475904
Total committed heap usage (bytes)=836239360
File Input Format Counters
Bytes Read=0
File Output Format Counters
Bytes Written=10110817
18/01/26 23:57:14 INFO mapreduce.ImportJobBase: Transferred 9.6424 MB in 38.4431 seconds (256.8429 KB/sec)
18/01/26 23:57:14 INFO mapreduce.ImportJobBase: Retrieved 300024 records.
18/01/26 23:57:14 INFO manager.SqlManager: Executing SQL statement: SELECT t.* FROM `current_dept_emp` AS t LIMIT 1
18/01/26 23:57:15 WARN hive.TableDefWriter: Column from_date had to be cast to a less precise type in Hive
18/01/26 23:57:15 WARN hive.TableDefWriter: Column to_date had to be cast to a less precise type in Hive
18/01/26 23:57:15 INFO hive.HiveImport: Loading uploaded data into Hive

Logging initialized using configuration in jar:file:/opt/cloudera/parcels/CDH-5.13.1-1.cdh5.13.1.p0.2/jars/hive-common-1.1.0-cdh5.13.1.jar!/hive-log4j.properties
OK
Time taken: 3.967 seconds
Loading data to table employees.current_dept_emp
Table employees.current_dept_emp stats: [numFiles=4, totalSize=10110817]
OK
Time taken: 0.85 seconds

[donghua@cdh-vm test_db-master]$ sqoop import --connect jdbc:mysql://cdh-vm.dbaglobe.com/employees --username employee_user --password password --table current_dept_emp --split-by=emp_no --hive-import --create-hive-table --hive-table=employees.current_dept_emp2 --target-dir=/user/donghua/current_dept_emp2

Warning: /opt/cloudera/parcels/CDH-5.13.1-1.cdh5.13.1.p0.2/bin/../lib/sqoop/../accumulo does not exist! Accumulo imports will fail.

Please set $ACCUMULO_HOME to the root of your Accumulo installation.

18/01/27 00:00:21 INFO sqoop.Sqoop: Running Sqoop version: 1.4.6-cdh5.13.1

18/01/27 00:00:21 WARN tool.BaseSqoopTool: Setting your password on the command-line is insecure. Consider using -P instead.

18/01/27 00:00:21 INFO tool.BaseSqoopTool: Using Hive-specific delimiters for output. You can override

18/01/27 00:00:21 INFO tool.BaseSqoopTool: delimiters with --fields-terminated-by, etc.

18/01/27 00:00:21 INFO manager.MySQLManager: Preparing to use a MySQL streaming resultset.

18/01/27 00:00:21 INFO tool.CodeGenTool: Beginning code generation

18/01/27 00:00:21 INFO manager.SqlManager: Executing SQL statement: SELECT t.* FROM `current_dept_emp` AS t LIMIT 1

18/01/27 00:00:22 INFO orm.CompilationManager: HADOOP_MAPRED_HOME is /opt/cloudera/parcels/CDH/lib/hadoop-mapreduce

Note: /tmp/sqoop-donghua/compile/24b51955f91604b3504e2d409fe1d631/current_dept_emp.java uses or overrides a deprecated API.

Note: Recompile with -Xlint:deprecation for details.

18/01/27 00:00:23 INFO orm.CompilationManager: Writing jar file: /tmp/sqoop-donghua/compile/24b51955f91604b3504e2d409fe1d631/current_dept_emp.jar

18/01/27 00:00:23 WARN manager.MySQLManager: It looks like you are importing from mysql.

18/01/27 00:00:23 WARN manager.MySQLManager: This transfer can be faster! Use the --direct

18/01/27 00:00:23 WARN manager.MySQLManager: option to exercise a MySQL-specific fast path.

18/01/27 00:00:23 INFO manager.MySQLManager: Setting zero DATETIME behavior to convertToNull (mysql)

18/01/27 00:00:23 INFO mapreduce.ImportJobBase: Beginning import of current_dept_emp

18/01/27 00:00:23 INFO Configuration.deprecation: mapred.jar is deprecated. Instead, use mapreduce.job.jar

18/01/27 00:00:24 INFO Configuration.deprecation: mapred.map.tasks is deprecated. Instead, use mapreduce.job.maps

18/01/27 00:00:24 INFO client.RMProxy: Connecting to ResourceManager at cdh-vm.dbaglobe.com/192.168.56.10:8032

18/01/27 00:00:30 INFO db.DBInputFormat: Using read commited transaction isolation

18/01/27 00:00:30 INFO db.DataDrivenDBInputFormat: BoundingValsQuery: SELECT MIN(`emp_no`), MAX(`emp_no`) FROM `current_dept_emp`

18/01/27 00:00:31 INFO db.IntegerSplitter: Split size: 122499; Num splits: 4 from: 10001 to: 499999

18/01/27 00:00:31 INFO mapreduce.JobSubmitter: number of splits:4

18/01/27 00:00:32 INFO mapreduce.JobSubmitter: Submitting tokens for job: job_1517023991003_0004

18/01/27 00:00:32 INFO impl.YarnClientImpl: Submitted application application_1517023991003_0004

18/01/27 00:00:32 INFO mapreduce.Job: The url to track the job: http://cdh-vm.dbaglobe.com:8088/proxy/application_1517023991003_0004/

18/01/27 00:00:32 INFO mapreduce.Job: Running job: job_1517023991003_0004

18/01/27 00:00:39 INFO mapreduce.Job: Job job_1517023991003_0004 running in uber mode : false

18/01/27 00:00:39 INFO mapreduce.Job: map 0% reduce 0%

18/01/27 00:00:48 INFO mapreduce.Job: map 25% reduce 0%

18/01/27 00:00:53 INFO mapreduce.Job: map 50% reduce 0%

18/01/27 00:00:58 INFO mapreduce.Job: map 75% reduce 0%

18/01/27 00:01:05 INFO mapreduce.Job: map 100% reduce 0%

18/01/27 00:01:05 INFO mapreduce.Job: Job job_1517023991003_0004 completed successfully

18/01/27 00:01:05 INFO mapreduce.Job: Counters: 30

File System Counters

FILE: Number of bytes read=0

FILE: Number of bytes written=698244

FILE: Number of read operations=0

FILE: Number of large read operations=0

FILE: Number of write operations=0

HDFS: Number of bytes read=464

HDFS: Number of bytes written=10110817

HDFS: Number of read operations=16

HDFS: Number of large read operations=0

HDFS: Number of write operations=8

Job Counters

Launched map tasks=4

Other local map tasks=4

Total time spent by all maps in occupied slots (ms)=17494

Total time spent by all reduces in occupied slots (ms)=0

Total time spent by all map tasks (ms)=17494

Total vcore-milliseconds taken by all map tasks=17494

Total megabyte-milliseconds taken by all map tasks=26870784

Map-Reduce Framework

Map input records=300024

Map output records=300024

Input split bytes=464

Spilled Records=0

Failed Shuffles=0

Merged Map outputs=0

GC time elapsed (ms)=296

CPU time spent (ms)=8230

Physical memory (bytes) snapshot=935788544

Virtual memory (bytes) snapshot=11149619200

Total committed heap usage (bytes)=926416896

File Input Format Counters

Bytes Read=0

File Output Format Counters

Bytes Written=10110817

18/01/27 00:01:05 INFO mapreduce.ImportJobBase: Transferred 9.6424 MB in 40.7122 seconds (242.5281 KB/sec)

18/01/27 00:01:05 INFO mapreduce.ImportJobBase: Retrieved 300024 records.

18/01/27 00:01:05 INFO manager.SqlManager: Executing SQL statement: SELECT t.* FROM `current_dept_emp` AS t LIMIT 1

18/01/27 00:01:05 WARN hive.TableDefWriter: Column from_date had to be cast to a less precise type in Hive

18/01/27 00:01:05 WARN hive.TableDefWriter: Column to_date had to be cast to a less precise type in Hive

18/01/27 00:01:05 INFO hive.HiveImport: Loading uploaded data into Hive

Logging initialized using configuration in jar:file:/opt/cloudera/parcels/CDH-5.13.1-1.cdh5.13.1.p0.2/jars/hive-common-1.1.0-cdh5.13.1.jar!/hive-log4j.properties

Time taken: 2.277 seconds

Loading data to table employees.current_dept_emp2

Table employees.current_dept_emp2 stats: [numFiles=4, totalSize=10110817]

Time taken: 0.638 seconds

0: jdbc:hive2://localhost:10000/default> use employees;
INFO : Compiling command(queryId=hive_20180127000909_679b9dfa-5161-467c-9620-8081c6686c8e): use employees
INFO : Semantic Analysis Completed
INFO : Returning Hive schema: Schema(fieldSchemas:null, properties:null)
INFO : Completed compiling command(queryId=hive_20180127000909_679b9dfa-5161-467c-9620-8081c6686c8e); Time taken: 0.007 seconds
INFO : Executing command(queryId=hive_20180127000909_679b9dfa-5161-467c-9620-8081c6686c8e): use employees
INFO : Starting task [Stage-0:DDL] in serial mode
INFO : Completed executing command(queryId=hive_20180127000909_679b9dfa-5161-467c-9620-8081c6686c8e); Time taken: 0.021 seconds
INFO : OK
No rows affected (0.048 seconds)

0: jdbc:hive2://localhost:10000/default> !tables
+------------+--------------+--------------------+-------------+-------------------------------------------+--+
| TABLE_CAT | TABLE_SCHEM | TABLE_NAME | TABLE_TYPE | REMARKS |
+------------+--------------+--------------------+-------------+-------------------------------------------+--+
| | employees | current_dept_emp | TABLE | Imported by sqoop on 2018/01/26 23:57:15 |
| | employees | current_dept_emp2 | TABLE | Imported by sqoop on 2018/01/27 00:01:05 |
+------------+--------------+--------------------+-------------+-------------------------------------------+--+

0: jdbc:hive2://localhost:10000/default> !set maxcolumnwidth 200

0: jdbc:hive2://localhost:10000/default> show create table employees.current_dept_emp;
INFO : Compiling command(queryId=hive_20180127005252_fe156650-eacf-492d-8860-17af7d4fc590): show create table employees.current_dept_emp
INFO : Semantic Analysis Completed
INFO : Returning Hive schema: Schema(fieldSchemas:[FieldSchema(name:createtab_stmt, type:string, comment:from deserializer)], properties:null)
INFO : Completed compiling command(queryId=hive_20180127005252_fe156650-eacf-492d-8860-17af7d4fc590); Time taken: 0.03 seconds
INFO : Executing command(queryId=hive_20180127005252_fe156650-eacf-492d-8860-17af7d4fc590): show create table employees.current_dept_emp
INFO : Starting task [Stage-0:DDL] in serial mode
INFO : Completed executing command(queryId=hive_20180127005252_fe156650-eacf-492d-8860-17af7d4fc590); Time taken: 0.009 seconds
INFO : OK
+----------------------------------------------------------------------------------------+--+
| createtab_stmt |
+----------------------------------------------------------------------------------------+--+
| CREATE TABLE `employees.current_dept_emp`( |
| `emp_no` int, |
| `dept_no` string, |
| `from_date` string, |
| `to_date` string) |
| COMMENT 'Imported by sqoop on 2018/01/26 23:57:15' |
| ROW FORMAT SERDE |
| 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' |
| WITH SERDEPROPERTIES ( |
| 'field.delim'='\u0001', |
| 'line.delim'='\n', |
| 'serialization.format'='\u0001') |
| STORED AS INPUTFORMAT |
| 'org.apache.hadoop.mapred.TextInputFormat' |
| OUTPUTFORMAT |
| 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' |
| LOCATION |
| 'hdfs://cdh-vm.dbaglobe.com:8020/user/hive/warehouse/employees.db/current_dept_emp' |
| TBLPROPERTIES ( |
| 'COLUMN_STATS_ACCURATE'='true', |
| 'numFiles'='4', |
| 'totalSize'='10110817', |
| 'transient_lastDdlTime'='1517029041') |
+----------------------------------------------------------------------------------------+--+
23 rows selected (0.087 seconds)

0: jdbc:hive2://localhost:10000/default> show create table employees.current_dept_emp2;
INFO : Compiling command(queryId=hive_20180127005252_e90f722a-ffd4-400d-ae8b-aa76c382dc78): show create table employees.current_dept_emp2
INFO : Semantic Analysis Completed
INFO : Returning Hive schema: Schema(fieldSchemas:[FieldSchema(name:createtab_stmt, type:string, comment:from deserializer)], properties:null)
INFO : Completed compiling command(queryId=hive_20180127005252_e90f722a-ffd4-400d-ae8b-aa76c382dc78); Time taken: 0.027 seconds
INFO : Executing command(queryId=hive_20180127005252_e90f722a-ffd4-400d-ae8b-aa76c382dc78): show create table employees.current_dept_emp2
INFO : Starting task [Stage-0:DDL] in serial mode
INFO : Completed executing command(queryId=hive_20180127005252_e90f722a-ffd4-400d-ae8b-aa76c382dc78); Time taken: 0.013 seconds
INFO : OK
+-----------------------------------------------------------------------------------------+--+
| createtab_stmt |
+-----------------------------------------------------------------------------------------+--+
| CREATE TABLE `employees.current_dept_emp2`( |
| `emp_no` int, |
| `dept_no` string, |
| `from_date` string, |
| `to_date` string) |
| COMMENT 'Imported by sqoop on 2018/01/27 00:01:05' |
| ROW FORMAT SERDE |
| 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' |
| WITH SERDEPROPERTIES ( |
| 'field.delim'='\u0001', |
| 'line.delim'='\n', |
| 'serialization.format'='\u0001') |
| STORED AS INPUTFORMAT |
| 'org.apache.hadoop.mapred.TextInputFormat' |
| OUTPUTFORMAT |
| 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' |
| LOCATION |
| 'hdfs://cdh-vm.dbaglobe.com:8020/user/hive/warehouse/employees.db/current_dept_emp2' |
| TBLPROPERTIES ( |
| 'COLUMN_STATS_ACCURATE'='true', |
| 'numFiles'='4', |
| 'totalSize'='10110817', |
| 'transient_lastDdlTime'='1517029269') |
+-----------------------------------------------------------------------------------------+--+
23 rows selected (0.079 seconds)
0: jdbc:hive2://localhost:10000/default>

[root@cdh-vm ~]# hdfs dfs -ls /user//hive/warehouse
Found 2 items
drwxrwxrwt - donghua hive 0 2018-01-27 00:01 /user/hive/warehouse/employees.db
drwxrwxrwt - donghua hive 0 2018-01-27 00:38 /user/hive/warehouse/test.db
[hdfs@cdh-vm ~]$ hdfs dfs -ls /user//hive/warehouse/employees.db
Found 2 items
drwxrwxrwt - donghua hive 0 2018-01-26 23:57 /user/hive/warehouse/employees.db/current_dept_emp
drwxrwxrwt - donghua hive 0 2018-01-27 00:01 /user/hive/warehouse/employees.db/current_dept_emp2

0: jdbc:hive2://localhost:10000/default> select count(*) from employees.current_dept_emp;
INFO : Compiling command(queryId=hive_20180127003737_611e22dd-873d-48f6-9888-a414f9b5cf0a): select count(*) from employees.current_dept_emp
INFO : Semantic Analysis Completed
INFO : Returning Hive schema: Schema(fieldSchemas:[FieldSchema(name:_c0, type:bigint, comment:null)], properties:null)
INFO : Completed compiling command(queryId=hive_20180127003737_611e22dd-873d-48f6-9888-a414f9b5cf0a); Time taken: 0.065 seconds
INFO : Executing command(queryId=hive_20180127003737_611e22dd-873d-48f6-9888-a414f9b5cf0a): select count(*) from employees.current_dept_emp
INFO : Query ID = hive_20180127003737_611e22dd-873d-48f6-9888-a414f9b5cf0a
INFO : Total jobs = 1
INFO : Launching Job 1 out of 1
INFO : Starting task [Stage-1:MAPRED] in serial mode
INFO : Number of reduce tasks determined at compile time: 1
INFO : In order to change the average load for a reducer (in bytes):
INFO : set hive.exec.reducers.bytes.per.reducer=
INFO : In order to limit the maximum number of reducers:
INFO : set hive.exec.reducers.max=
INFO : In order to set a constant number of reducers:
INFO : set mapreduce.job.reduces=
INFO : number of splits:1
INFO : Submitting tokens for job: job_1517023991003_0007
INFO : The url to track the job: http://cdh-vm.dbaglobe.com:8088/proxy/application_1517023991003_0007/
INFO : Starting Job = job_1517023991003_0007, Tracking URL = http://cdh-vm.dbaglobe.com:8088/proxy/application_1517023991003_0007/
INFO : Kill Command = /opt/cloudera/parcels/CDH-5.13.1-1.cdh5.13.1.p0.2/lib/hadoop/bin/hadoop job -kill job_1517023991003_0007
INFO : Hadoop job information for Stage-1: number of mappers: 1; number of reducers: 1
INFO : 2018-01-27 00:37:50,690 Stage-1 map = 0%, reduce = 0%
INFO : 2018-01-27 00:37:58,188 Stage-1 map = 100%, reduce = 0%, Cumulative CPU 1.83 sec
INFO : 2018-01-27 00:38:05,606 Stage-1 map = 100%, reduce = 100%, Cumulative CPU 3.79 sec
INFO : MapReduce Total cumulative CPU time: 3 seconds 790 msec
INFO : Ended Job = job_1517023991003_0007
INFO : MapReduce Jobs Launched:
INFO : Stage-Stage-1: Map: 1 Reduce: 1 Cumulative CPU: 3.79 sec HDFS Read: 10118840 HDFS Write: 7 SUCCESS
INFO : Total MapReduce CPU Time Spent: 3 seconds 790 msec
INFO : Completed executing command(queryId=hive_20180127003737_611e22dd-873d-48f6-9888-a414f9b5cf0a); Time taken: 23.26 seconds
INFO : OK
+---------+--+
| _c0 |
+---------+--+
| 300024 |
+---------+--+
1 row selected (23.371 seconds)

↧

Use sqoop to perform incremental import (--check-column=id --incremental=append --last-value=)

January 27, 2018, 2:23 am

≫ Next: Use Sqoop to perform incremental data loading (--incremental=lastmodified for timestamp column)

≪ Previous: Use Sqoop to import into hive tables (source is a view)

[donghua@cdh-vm ~]$ mysql -u employee_user -ppassword -D employees

MariaDB [employees]> create table t1 (id int primary key, c1 varchar(10));

MariaDB [employees]> insert into t1 values(1,'a'),(2,'b');

MariaDB [employees]> select * from t1;
+----+------+
| id | c1 |
+----+------+
| 1 | a |
| 2 | b |
+----+------+
2 rows in set (0.00 sec)

[donghua@cdh-vm ~]$ beeline -u jdbc:hive2://localhost:10000/default -n donghua --silent=true
0: jdbc:hive2://localhost:10000/default> create table employees.t1(id int, c1 varchar(10));

[donghua@cdh-vm ~]$ sqoop import --connect jdbc:mysql://cdh-vm.dbaglobe.com/employees \
> --username employee_user --password password --table t1 \
> --split-by=id --hive-import --hive-table=employees.t1 \
~~> --warehouse-dir=/user/hive/warehouse \~~
> --check-column=id --incremental=append --last-value=0
Warning: /opt/cloudera/parcels/CDH-5.13.1-1.cdh5.13.1.p0.2/bin/../lib/sqoop/../accumulo does not exist! Accumulo imports will fail.
Please set $ACCUMULO_HOME to the root of your Accumulo installation.
18/01/27 03:54:38 INFO sqoop.Sqoop: Running Sqoop version: 1.4.6-cdh5.13.1
18/01/27 03:54:38 WARN tool.BaseSqoopTool: Setting your password on the command-line is insecure. Consider using -P instead.
18/01/27 03:54:38 INFO tool.BaseSqoopTool: Using Hive-specific delimiters for output. You can override
18/01/27 03:54:38 INFO tool.BaseSqoopTool: delimiters with --fields-terminated-by, etc.
18/01/27 03:54:38 WARN tool.BaseSqoopTool: It seems that you're doing hive import directly into default
18/01/27 03:54:38 WARN tool.BaseSqoopTool: hive warehouse directory which is not supported. Sqoop is
18/01/27 03:54:38 WARN tool.BaseSqoopTool: firstly importing data into separate directory and then
18/01/27 03:54:38 WARN tool.BaseSqoopTool: inserting data into hive. Please consider removing
18/01/27 03:54:38 WARN tool.BaseSqoopTool: --target-dir or --warehouse-dir into /user/hive/warehouse in
18/01/27 03:54:38 WARN tool.BaseSqoopTool: case that you will detect any issues.
18/01/27 03:54:38 INFO manager.MySQLManager: Preparing to use a MySQL streaming resultset.
18/01/27 03:54:38 INFO tool.CodeGenTool: Beginning code generation
18/01/27 03:54:38 INFO manager.SqlManager: Executing SQL statement: SELECT t.* FROM `t1` AS t LIMIT 1
18/01/27 03:54:38 INFO manager.SqlManager: Executing SQL statement: SELECT t.* FROM `t1` AS t LIMIT 1
18/01/27 03:54:38 INFO orm.CompilationManager: HADOOP_MAPRED_HOME is /opt/cloudera/parcels/CDH/lib/hadoop-mapreduce
Note: /tmp/sqoop-donghua/compile/1941b9efeafd888916e872561fa71b1d/t1.java uses or overrides a deprecated API.
Note: Recompile with -Xlint:deprecation for details.
18/01/27 03:54:40 INFO orm.CompilationManager: Writing jar file: /tmp/sqoop-donghua/compile/1941b9efeafd888916e872561fa71b1d/t1.jar
18/01/27 03:54:41 INFO tool.ImportTool: Maximal id query for free form incremental import: SELECT MAX(`id`) FROM `t1`
18/01/27 03:54:41 INFO tool.ImportTool: Incremental import based on column `id`
18/01/27 03:54:41 INFO tool.ImportTool: Lower bound value: 0
18/01/27 03:54:41 INFO tool.ImportTool: Upper bound value: 2
18/01/27 03:54:41 WARN manager.MySQLManager: It looks like you are importing from mysql.
18/01/27 03:54:41 WARN manager.MySQLManager: This transfer can be faster! Use the --direct
18/01/27 03:54:41 WARN manager.MySQLManager: option to exercise a MySQL-specific fast path.
18/01/27 03:54:41 INFO manager.MySQLManager: Setting zero DATETIME behavior to convertToNull (mysql)
18/01/27 03:54:41 INFO mapreduce.ImportJobBase: Beginning import of t1
18/01/27 03:54:41 INFO Configuration.deprecation: mapred.jar is deprecated. Instead, use mapreduce.job.jar
18/01/27 03:54:41 INFO Configuration.deprecation: mapred.map.tasks is deprecated. Instead, use mapreduce.job.maps
18/01/27 03:54:41 INFO client.RMProxy: Connecting to ResourceManager at cdh-vm.dbaglobe.com/192.168.56.10:8032
18/01/27 03:54:46 INFO db.DBInputFormat: Using read commited transaction isolation
18/01/27 03:54:46 INFO db.DataDrivenDBInputFormat: BoundingValsQuery: SELECT MIN(`id`), MAX(`id`) FROM `t1` WHERE ( `id` > 0 AND `id` <= 2 )
18/01/27 03:54:46 INFO db.IntegerSplitter: Split size: 0; Num splits: 4 from: 1 to: 2
18/01/27 03:54:46 INFO mapreduce.JobSubmitter: number of splits:2
18/01/27 03:54:47 INFO mapreduce.JobSubmitter: Submitting tokens for job: job_1517023991003_0009
18/01/27 03:54:47 INFO impl.YarnClientImpl: Submitted application application_1517023991003_0009
18/01/27 03:54:47 INFO mapreduce.Job: The url to track the job: http://cdh-vm.dbaglobe.com:8088/proxy/application_1517023991003_0009/
18/01/27 03:54:47 INFO mapreduce.Job: Running job: job_1517023991003_0009
18/01/27 03:54:54 INFO mapreduce.Job: Job job_1517023991003_0009 running in uber mode : false
18/01/27 03:54:54 INFO mapreduce.Job: map 0% reduce 0%
18/01/27 03:55:02 INFO mapreduce.Job: map 50% reduce 0%
18/01/27 03:55:06 INFO mapreduce.Job: map 100% reduce 0%
18/01/27 03:55:07 INFO mapreduce.Job: Job job_1517023991003_0009 completed successfully
18/01/27 03:55:07 INFO mapreduce.Job: Counters: 30
File System Counters
FILE: Number of bytes read=0
FILE: Number of bytes written=350308
FILE: Number of read operations=0
FILE: Number of large read operations=0
FILE: Number of write operations=0
HDFS: Number of bytes read=197
HDFS: Number of bytes written=8
HDFS: Number of read operations=8
HDFS: Number of large read operations=0
HDFS: Number of write operations=4
Job Counters
Launched map tasks=2
Other local map tasks=2
Total time spent by all maps in occupied slots (ms)=7843
Total time spent by all reduces in occupied slots (ms)=0
Total time spent by all map tasks (ms)=7843
Total vcore-milliseconds taken by all map tasks=7843
Total megabyte-milliseconds taken by all map tasks=12046848
Map-Reduce Framework
Map input records=2
Map output records=2
Input split bytes=197
Spilled Records=0
Failed Shuffles=0
Merged Map outputs=0
GC time elapsed (ms)=110
CPU time spent (ms)=1970
Physical memory (bytes) snapshot=413765632
Virtual memory (bytes) snapshot=5572857856
Total committed heap usage (bytes)=402653184
File Input Format Counters
Bytes Read=0
File Output Format Counters
Bytes Written=8
18/01/27 03:55:07 INFO mapreduce.ImportJobBase: Transferred 8 bytes in 26.2002 seconds (0.3053 bytes/sec)
18/01/27 03:55:07 INFO mapreduce.ImportJobBase: Retrieved 2 records.
18/01/27 03:55:07 INFO util.AppendUtils: Creating missing output directory - t1
18/01/27 03:55:07 INFO manager.SqlManager: Executing SQL statement: SELECT t.* FROM `t1` AS t LIMIT 1
18/01/27 03:55:07 INFO hive.HiveImport: Loading uploaded data into Hive

Logging initialized using configuration in jar:file:/opt/cloudera/parcels/CDH-5.13.1-1.cdh5.13.1.p0.2/jars/hive-common-1.1.0-cdh5.13.1.jar!/hive-log4j.properties
OK
Time taken: 2.037 seconds
Loading data to table employees.t1
Table employees.t1 stats: [numFiles=2, totalSize=8]
OK
Time taken: 0.646 seconds

0: jdbc:hive2://localhost:10000/default> select * from employees.t1;
+--------+--------+--+
| t1.id | t1.c1 |
+--------+--------+--+
| 1 | a |
| 2 | b |
+--------+--------+--+

MariaDB [employees]> insert into t1 values(3,'a'),(4,'b');
Query OK, 2 rows affected (0.00 sec)
Records: 2 Duplicates: 0 Warnings: 0

[donghua@cdh-vm ~]$ sqoop import --connect jdbc:mysql://cdh-vm.dbaglobe.com/employees --username employee_user --password password --table t1 --split-by=id --hive-import --hive-table=employees.t1 --warehouse-dir=/user/hive/warehouse --check-column=id --incremental=append --last-value=2
Warning: /opt/cloudera/parcels/CDH-5.13.1-1.cdh5.13.1.p0.2/bin/../lib/sqoop/../accumulo does not exist! Accumulo imports will fail.
Please set $ACCUMULO_HOME to the root of your Accumulo installation.
18/01/27 04:11:31 INFO sqoop.Sqoop: Running Sqoop version: 1.4.6-cdh5.13.1
18/01/27 04:11:31 WARN tool.BaseSqoopTool: Setting your password on the command-line is insecure. Consider using -P instead.
18/01/27 04:11:31 INFO tool.BaseSqoopTool: Using Hive-specific delimiters for output. You can override
18/01/27 04:11:31 INFO tool.BaseSqoopTool: delimiters with --fields-terminated-by, etc.
18/01/27 04:11:31 WARN tool.BaseSqoopTool: It seems that you're doing hive import directly into default
18/01/27 04:11:31 WARN tool.BaseSqoopTool: hive warehouse directory which is not supported. Sqoop is
18/01/27 04:11:31 WARN tool.BaseSqoopTool: firstly importing data into separate directory and then
18/01/27 04:11:31 WARN tool.BaseSqoopTool: inserting data into hive. Please consider removing
18/01/27 04:11:31 WARN tool.BaseSqoopTool: --target-dir or --warehouse-dir into /user/hive/warehouse in
18/01/27 04:11:31 WARN tool.BaseSqoopTool: case that you will detect any issues.
18/01/27 04:11:31 INFO manager.MySQLManager: Preparing to use a MySQL streaming resultset.
18/01/27 04:11:31 INFO tool.CodeGenTool: Beginning code generation
18/01/27 04:11:31 INFO manager.SqlManager: Executing SQL statement: SELECT t.* FROM `t1` AS t LIMIT 1
18/01/27 04:11:31 INFO manager.SqlManager: Executing SQL statement: SELECT t.* FROM `t1` AS t LIMIT 1
18/01/27 04:11:31 INFO orm.CompilationManager: HADOOP_MAPRED_HOME is /opt/cloudera/parcels/CDH/lib/hadoop-mapreduce
Note: /tmp/sqoop-donghua/compile/80c2f1f6c1f1b6c4b9fca928aa6353a8/t1.java uses or overrides a deprecated API.
Note: Recompile with -Xlint:deprecation for details.
18/01/27 04:11:33 INFO orm.CompilationManager: Writing jar file: /tmp/sqoop-donghua/compile/80c2f1f6c1f1b6c4b9fca928aa6353a8/t1.jar
18/01/27 04:11:34 INFO tool.ImportTool: Maximal id query for free form incremental import: SELECT MAX(`id`) FROM `t1`
18/01/27 04:11:34 INFO tool.ImportTool: Incremental import based on column `id`
18/01/27 04:11:34 INFO tool.ImportTool: Lower bound value: 2
18/01/27 04:11:34 INFO tool.ImportTool: Upper bound value: 4
18/01/27 04:11:34 WARN manager.MySQLManager: It looks like you are importing from mysql.
18/01/27 04:11:34 WARN manager.MySQLManager: This transfer can be faster! Use the --direct
18/01/27 04:11:34 WARN manager.MySQLManager: option to exercise a MySQL-specific fast path.
18/01/27 04:11:34 INFO manager.MySQLManager: Setting zero DATETIME behavior to convertToNull (mysql)
18/01/27 04:11:34 INFO mapreduce.ImportJobBase: Beginning import of t1
18/01/27 04:11:34 INFO Configuration.deprecation: mapred.jar is deprecated. Instead, use mapreduce.job.jar
18/01/27 04:11:34 INFO Configuration.deprecation: mapred.map.tasks is deprecated. Instead, use mapreduce.job.maps
18/01/27 04:11:34 INFO client.RMProxy: Connecting to ResourceManager at cdh-vm.dbaglobe.com/192.168.56.10:8032
18/01/27 04:11:38 INFO db.DBInputFormat: Using read commited transaction isolation
18/01/27 04:11:38 INFO db.DataDrivenDBInputFormat: BoundingValsQuery: SELECT MIN(`id`), MAX(`id`) FROM `t1` WHERE ( `id` > 2 AND `id` <= 4 )
18/01/27 04:11:38 INFO db.IntegerSplitter: Split size: 0; Num splits: 4 from: 3 to: 4
18/01/27 04:11:38 INFO mapreduce.JobSubmitter: number of splits:2
18/01/27 04:11:38 INFO mapreduce.JobSubmitter: Submitting tokens for job: job_1517023991003_0012
18/01/27 04:11:38 INFO impl.YarnClientImpl: Submitted application application_1517023991003_0012
18/01/27 04:11:38 INFO mapreduce.Job: The url to track the job: http://cdh-vm.dbaglobe.com:8088/proxy/application_1517023991003_0012/
18/01/27 04:11:38 INFO mapreduce.Job: Running job: job_1517023991003_0012
18/01/27 04:11:45 INFO mapreduce.Job: Job job_1517023991003_0012 running in uber mode : false
18/01/27 04:11:45 INFO mapreduce.Job: map 0% reduce 0%
18/01/27 04:11:51 INFO mapreduce.Job: map 50% reduce 0%
18/01/27 04:11:57 INFO mapreduce.Job: map 100% reduce 0%
18/01/27 04:11:57 INFO mapreduce.Job: Job job_1517023991003_0012 completed successfully
18/01/27 04:11:57 INFO mapreduce.Job: Counters: 30
File System Counters
FILE: Number of bytes read=0
FILE: Number of bytes written=350308
FILE: Number of read operations=0
FILE: Number of large read operations=0
FILE: Number of write operations=0
HDFS: Number of bytes read=197
HDFS: Number of bytes written=8
HDFS: Number of read operations=8
HDFS: Number of large read operations=0
HDFS: Number of write operations=4
Job Counters
Launched map tasks=2
Other local map tasks=2
Total time spent by all maps in occupied slots (ms)=7531
Total time spent by all reduces in occupied slots (ms)=0
Total time spent by all map tasks (ms)=7531
Total vcore-milliseconds taken by all map tasks=7531
Total megabyte-milliseconds taken by all map tasks=11567616
Map-Reduce Framework
Map input records=2
Map output records=2
Input split bytes=197
Spilled Records=0
Failed Shuffles=0
Merged Map outputs=0
GC time elapsed (ms)=114
CPU time spent (ms)=1800
Physical memory (bytes) snapshot=403120128
Virtual memory (bytes) snapshot=5573816320
Total committed heap usage (bytes)=359137280
File Input Format Counters
Bytes Read=0
File Output Format Counters
Bytes Written=8
18/01/27 04:11:57 INFO mapreduce.ImportJobBase: Transferred 8 bytes in 23.359 seconds (0.3425 bytes/sec)
18/01/27 04:11:57 INFO mapreduce.ImportJobBase: Retrieved 2 records.
18/01/27 04:11:57 INFO util.AppendUtils: Creating missing output directory - t1
18/01/27 04:11:57 INFO manager.SqlManager: Executing SQL statement: SELECT t.* FROM `t1` AS t LIMIT 1
18/01/27 04:11:57 INFO hive.HiveImport: Loading uploaded data into Hive

Logging initialized using configuration in jar:file:/opt/cloudera/parcels/CDH-5.13.1-1.cdh5.13.1.p0.2/jars/hive-common-1.1.0-cdh5.13.1.jar!/hive-log4j.properties
OK
Time taken: 1.853 seconds
Loading data to table employees.t1
Table employees.t1 stats: [numFiles=4, numRows=0, totalSize=16, rawDataSize=0]
OK
Time taken: 0.603 seconds

0: jdbc:hive2://localhost:10000/default> select * from employees.t1;
+--------+--------+--+
| t1.id | t1.c1 |
+--------+--------+--+
| 1 | a |
| 3 | a |
| 2 | b |
| 4 | b |
+--------+--------+--+

[donghua@cdh-vm ~]$ sqoop job --create emp_t1_incr -- import --connect jdbc:mysql://cdh-vm.dbaglobe.com/employees --username employee_user --password password --table t1 --split-by=id --hive-import --hive-table=employees.t1 --warehouse-dir=/user/hive/warehouse --check-column=id --incremental=append --last-value=4
Warning: /opt/cloudera/parcels/CDH-5.13.1-1.cdh5.13.1.p0.2/bin/../lib/sqoop/../accumulo does not exist! Accumulo imports will fail.
Please set $ACCUMULO_HOME to the root of your Accumulo installation.
18/01/27 04:21:32 INFO sqoop.Sqoop: Running Sqoop version: 1.4.6-cdh5.13.1
18/01/27 04:21:32 WARN tool.BaseSqoopTool: Setting your password on the command-line is insecure. Consider using -P instead.
18/01/27 04:21:32 INFO tool.BaseSqoopTool: Using Hive-specific delimiters for output. You can override
18/01/27 04:21:32 INFO tool.BaseSqoopTool: delimiters with --fields-terminated-by, etc.
18/01/27 04:21:32 WARN tool.BaseSqoopTool: It seems that you're doing hive import directly into default
18/01/27 04:21:32 WARN tool.BaseSqoopTool: hive warehouse directory which is not supported. Sqoop is
18/01/27 04:21:32 WARN tool.BaseSqoopTool: firstly importing data into separate directory and then
18/01/27 04:21:32 WARN tool.BaseSqoopTool: inserting data into hive. Please consider removing
18/01/27 04:21:32 WARN tool.BaseSqoopTool: --target-dir or --warehouse-dir into /user/hive/warehouse in
18/01/27 04:21:32 WARN tool.BaseSqoopTool: case that you will detect any issues.

[donghua@cdh-vm ~]$ sqoop job --list
Warning: /opt/cloudera/parcels/CDH-5.13.1-1.cdh5.13.1.p0.2/bin/../lib/sqoop/../accumulo does not exist! Accumulo imports will fail.
Please set $ACCUMULO_HOME to the root of your Accumulo installation.
18/01/27 04:30:13 INFO sqoop.Sqoop: Running Sqoop version: 1.4.6-cdh5.13.1
Available jobs:
emp_t1_incr

[donghua@cdh-vm ~]$ sqoop job --show emp_t1_incr
Warning: /opt/cloudera/parcels/CDH-5.13.1-1.cdh5.13.1.p0.2/bin/../lib/sqoop/../accumulo does not exist! Accumulo imports will fail.
Please set $ACCUMULO_HOME to the root of your Accumulo installation.
18/01/27 04:35:40 INFO sqoop.Sqoop: Running Sqoop version: 1.4.6-cdh5.13.1
Enter password:
Job: emp_t1_incr
Tool: import
Options:
----------------------------
verbose = false
hcatalog.drop.and.create.table = false
incremental.last.value = 4
db.connect.string = jdbc:mysql://cdh-vm.dbaglobe.com/employees
codegen.output.delimiters.escape = 0
codegen.output.delimiters.enclose.required = false
codegen.input.delimiters.field = 0
mainframe.input.dataset.type = p
split.limit = null
hbase.create.table = false
db.require.password = true
hdfs.append.dir = true
db.table = t1
codegen.input.delimiters.escape = 0
accumulo.create.table = false
import.fetch.size = null
codegen.input.delimiters.enclose.required = false
db.username = employee_user
reset.onemapper = false
codegen.output.delimiters.record = 10
import.max.inline.lob.size = 16777216
sqoop.throwOnError = false
hbase.bulk.load.enabled = false
hcatalog.create.table = false
db.clear.staging.table = false
incremental.col = id
codegen.input.delimiters.record = 0
hdfs.warehouse.dir = /user/hive/warehouse
enable.compression = false
hive.overwrite.table = false
hive.import = true
codegen.input.delimiters.enclose = 0
hive.table.name = employees.t1
accumulo.batch.size = 10240000
hive.drop.delims = false
customtool.options.jsonmap = {}
codegen.output.delimiters.enclose = 0
hdfs.delete-target.dir = false
codegen.output.dir = .
codegen.auto.compile.dir = true
relaxed.isolation = false
mapreduce.num.mappers = 4
accumulo.max.latency = 5000
import.direct.split.size = 0
sqlconnection.metadata.transaction.isolation.level = 2
codegen.output.delimiters.field = 1
export.new.update = UpdateOnly
incremental.mode = AppendRows
hdfs.file.format = TextFile
sqoop.oracle.escaping.disabled = true
codegen.compile.dir = /tmp/sqoop-donghua/compile/e7212eb92686a1486fa1cd44a6c9afc7
direct.import = false
temporary.dirRoot = _sqoop
db.split.column = id
hive.fail.table.exists = false
db.batch = false

[donghua@cdh-vm ~]$ sqoop job --exec emp_t1_incr
Warning: /opt/cloudera/parcels/CDH-5.13.1-1.cdh5.13.1.p0.2/bin/../lib/sqoop/../accumulo does not exist! Accumulo imports will fail.
Please set $ACCUMULO_HOME to the root of your Accumulo installation.
18/01/27 04:38:01 INFO sqoop.Sqoop: Running Sqoop version: 1.4.6-cdh5.13.1
Enter password:
18/01/27 04:38:06 WARN tool.BaseSqoopTool: It seems that you're doing hive import directly into default
18/01/27 04:38:06 WARN tool.BaseSqoopTool: hive warehouse directory which is not supported. Sqoop is
18/01/27 04:38:06 WARN tool.BaseSqoopTool: firstly importing data into separate directory and then
18/01/27 04:38:06 WARN tool.BaseSqoopTool: inserting data into hive. Please consider removing
18/01/27 04:38:06 WARN tool.BaseSqoopTool: --target-dir or --warehouse-dir into /user/hive/warehouse in
18/01/27 04:38:06 WARN tool.BaseSqoopTool: case that you will detect any issues.
18/01/27 04:38:06 INFO manager.MySQLManager: Preparing to use a MySQL streaming resultset.
18/01/27 04:38:06 INFO tool.CodeGenTool: Beginning code generation
18/01/27 04:38:07 INFO manager.SqlManager: Executing SQL statement: SELECT t.* FROM `t1` AS t LIMIT 1
18/01/27 04:38:07 INFO manager.SqlManager: Executing SQL statement: SELECT t.* FROM `t1` AS t LIMIT 1
18/01/27 04:38:07 INFO orm.CompilationManager: HADOOP_MAPRED_HOME is /opt/cloudera/parcels/CDH/lib/hadoop-mapreduce
Note: /tmp/sqoop-donghua/compile/e3c397f1a5469f870ba19e95b80a66a2/t1.java uses or overrides a deprecated API.
Note: Recompile with -Xlint:deprecation for details.
18/01/27 04:38:08 INFO orm.CompilationManager: Writing jar file: /tmp/sqoop-donghua/compile/e3c397f1a5469f870ba19e95b80a66a2/t1.jar
18/01/27 04:38:09 INFO tool.ImportTool: Maximal id query for free form incremental import: SELECT MAX(`id`) FROM `t1`
18/01/27 04:38:09 INFO tool.ImportTool: Incremental import based on column `id`
18/01/27 04:38:09 INFO tool.ImportTool: Lower bound value: 4
18/01/27 04:38:09 INFO tool.ImportTool: Upper bound value: 6
18/01/27 04:38:09 WARN manager.MySQLManager: It looks like you are importing from mysql.
18/01/27 04:38:09 WARN manager.MySQLManager: This transfer can be faster! Use the --direct
18/01/27 04:38:09 WARN manager.MySQLManager: option to exercise a MySQL-specific fast path.
18/01/27 04:38:09 INFO manager.MySQLManager: Setting zero DATETIME behavior to convertToNull (mysql)
18/01/27 04:38:09 INFO mapreduce.ImportJobBase: Beginning import of t1
18/01/27 04:38:09 INFO Configuration.deprecation: mapred.jar is deprecated. Instead, use mapreduce.job.jar
18/01/27 04:38:09 INFO Configuration.deprecation: mapred.map.tasks is deprecated. Instead, use mapreduce.job.maps
18/01/27 04:38:09 INFO client.RMProxy: Connecting to ResourceManager at cdh-vm.dbaglobe.com/192.168.56.10:8032
18/01/27 04:38:12 INFO db.DBInputFormat: Using read commited transaction isolation
18/01/27 04:38:12 INFO db.DataDrivenDBInputFormat: BoundingValsQuery: SELECT MIN(`id`), MAX(`id`) FROM `t1` WHERE ( `id` > 4 AND `id` <= 6 )
18/01/27 04:38:12 INFO db.IntegerSplitter: Split size: 0; Num splits: 4 from: 5 to: 6
18/01/27 04:38:12 INFO mapreduce.JobSubmitter: number of splits:2
18/01/27 04:38:12 INFO mapreduce.JobSubmitter: Submitting tokens for job: job_1517023991003_0013
18/01/27 04:38:12 INFO impl.YarnClientImpl: Submitted application application_1517023991003_0013
18/01/27 04:38:12 INFO mapreduce.Job: The url to track the job: http://cdh-vm.dbaglobe.com:8088/proxy/application_1517023991003_0013/
18/01/27 04:38:12 INFO mapreduce.Job: Running job: job_1517023991003_0013
18/01/27 04:38:20 INFO mapreduce.Job: Job job_1517023991003_0013 running in uber mode : false
18/01/27 04:38:20 INFO mapreduce.Job: map 0% reduce 0%
18/01/27 04:38:26 INFO mapreduce.Job: map 50% reduce 0%
18/01/27 04:38:31 INFO mapreduce.Job: map 100% reduce 0%
18/01/27 04:38:32 INFO mapreduce.Job: Job job_1517023991003_0013 completed successfully
18/01/27 04:38:33 INFO mapreduce.Job: Counters: 30
File System Counters
FILE: Number of bytes read=0
FILE: Number of bytes written=351166
FILE: Number of read operations=0
FILE: Number of large read operations=0
FILE: Number of write operations=0
HDFS: Number of bytes read=197
HDFS: Number of bytes written=8
HDFS: Number of read operations=8
HDFS: Number of large read operations=0
HDFS: Number of write operations=4
Job Counters
Launched map tasks=2
Other local map tasks=2
Total time spent by all maps in occupied slots (ms)=8071
Total time spent by all reduces in occupied slots (ms)=0
Total time spent by all map tasks (ms)=8071
Total vcore-milliseconds taken by all map tasks=8071
Total megabyte-milliseconds taken by all map tasks=12397056
Map-Reduce Framework
Map input records=2
Map output records=2
Input split bytes=197
Spilled Records=0
Failed Shuffles=0
Merged Map outputs=0
GC time elapsed (ms)=136
CPU time spent (ms)=1820
Physical memory (bytes) snapshot=404738048
Virtual memory (bytes) snapshot=5573140480
Total committed heap usage (bytes)=354942976
File Input Format Counters
Bytes Read=0
File Output Format Counters
Bytes Written=8
18/01/27 04:38:33 INFO mapreduce.ImportJobBase: Transferred 8 bytes in 23.4364 seconds (0.3413 bytes/sec)
18/01/27 04:38:33 INFO mapreduce.ImportJobBase: Retrieved 2 records.
18/01/27 04:38:33 INFO util.AppendUtils: Creating missing output directory - t1
18/01/27 04:38:33 INFO manager.SqlManager: Executing SQL statement: SELECT t.* FROM `t1` AS t LIMIT 1
18/01/27 04:38:33 INFO hive.HiveImport: Loading uploaded data into Hive

Logging initialized using configuration in jar:file:/opt/cloudera/parcels/CDH-5.13.1-1.cdh5.13.1.p0.2/jars/hive-common-1.1.0-cdh5.13.1.jar!/hive-log4j.properties
OK
Time taken: 1.95 seconds
Loading data to table employees.t1
Table employees.t1 stats: [numFiles=6, numRows=0, totalSize=24, rawDataSize=0]
OK
Time taken: 0.664 seconds

[donghua@cdh-vm ~]$ sqoop job --show emp_t1_incr
Warning: /opt/cloudera/parcels/CDH-5.13.1-1.cdh5.13.1.p0.2/bin/../lib/sqoop/../accumulo does not exist! Accumulo imports will fail.
Please set $ACCUMULO_HOME to the root of your Accumulo installation.
18/01/27 04:38:50 INFO sqoop.Sqoop: Running Sqoop version: 1.4.6-cdh5.13.1
Enter password:
Job: emp_t1_incr
Tool: import
Options:
----------------------------
verbose = false
hcatalog.drop.and.create.table = false
incremental.last.value = 6
db.connect.string = jdbc:mysql://cdh-vm.dbaglobe.com/employees
codegen.output.delimiters.escape = 0
codegen.output.delimiters.enclose.required = false
codegen.input.delimiters.field = 0
mainframe.input.dataset.type = p
split.limit = null
hbase.create.table = false
db.require.password = true
hdfs.append.dir = true
db.table = t1
codegen.input.delimiters.escape = 0
accumulo.create.table = false
import.fetch.size = null
codegen.input.delimiters.enclose.required = false
db.username = employee_user
reset.onemapper = false
codegen.output.delimiters.record = 10
import.max.inline.lob.size = 16777216
sqoop.throwOnError = false
hbase.bulk.load.enabled = false
hcatalog.create.table = false
db.clear.staging.table = false
incremental.col = id
codegen.input.delimiters.record = 0
hdfs.warehouse.dir = /user/hive/warehouse
enable.compression = false
hive.overwrite.table = false
hive.import = true
codegen.input.delimiters.enclose = 0
hive.table.name = employees.t1
accumulo.batch.size = 10240000
hive.drop.delims = false
customtool.options.jsonmap = {}
codegen.output.delimiters.enclose = 0
hdfs.delete-target.dir = false
codegen.output.dir = .
codegen.auto.compile.dir = true
relaxed.isolation = false
mapreduce.num.mappers = 4
accumulo.max.latency = 5000
import.direct.split.size = 0
sqlconnection.metadata.transaction.isolation.level = 2
codegen.output.delimiters.field = 1
export.new.update = UpdateOnly
incremental.mode = AppendRows
hdfs.file.format = TextFile
sqoop.oracle.escaping.disabled = true
codegen.compile.dir = /tmp/sqoop-donghua/compile/39496d079794ae53a008a2da9cd2ac4a
direct.import = false
temporary.dirRoot = _sqoop
db.split.column = id
hive.fail.table.exists = false
db.batch = false

↧

Use Sqoop to perform incremental data loading (--incremental=lastmodified for timestamp column)

January 27, 2018, 2:30 am

≫ Next: Use sqoop export to move data from HDFS into MySQL

≪ Previous: Use sqoop to perform incremental import (--check-column=id --incremental=append --last-value=)

[donghua@cdh-vm ~]$ hdfs dfs -cat /user/donghua/t2/part-m-00000
1,2018-01-27 04:50:07.0
2,2018-01-27 04:50:18.0
[donghua@cdh-vm ~]$

MariaDB [employees]> insert into t2 values(3,current_timestamp());
Query OK, 1 row affected (0.01 sec)

MariaDB [employees]> update t2 set last_updated_at=current_timestamp() where id=2;
Query OK, 1 row affected (0.07 sec)
Rows matched: 1 Changed: 1 Warnings: 0

MariaDB [employees]> select * from t2;
+----+---------------------+
| id | last_updated_at |
+----+---------------------+
| 1 | 2018-01-27 04:50:07 |
| 2 | 2018-01-27 05:10:14 |
| 3 | 2018-01-27 05:09:45 |
+----+---------------------+
3 rows in set (0.00 sec)

MariaDB [employees]>

[donghua@cdh-vm ~]$ sqoop import --connect jdbc:mysql://cdh-vm.dbaglobe.com/employees --username employee_user --password password --table t2 -m 1 --target-dir=/user/donghua/t2 --check-column=last_updated_at --incremental=lastmodified --last-value='2018-01-27 05:06:03.0'
Warning: /opt/cloudera/parcels/CDH-5.13.1-1.cdh5.13.1.p0.2/bin/../lib/sqoop/../accumulo does not exist! Accumulo imports will fail.
Please set $ACCUMULO_HOME to the root of your Accumulo installation.
18/01/27 05:11:59 INFO sqoop.Sqoop: Running Sqoop version: 1.4.6-cdh5.13.1
18/01/27 05:11:59 WARN tool.BaseSqoopTool: Setting your password on the command-line is insecure. Consider using -P instead.
18/01/27 05:12:00 INFO manager.MySQLManager: Preparing to use a MySQL streaming resultset.
18/01/27 05:12:00 INFO tool.CodeGenTool: Beginning code generation
18/01/27 05:12:00 INFO manager.SqlManager: Executing SQL statement: SELECT t.* FROM `t2` AS t LIMIT 1
18/01/27 05:12:00 INFO manager.SqlManager: Executing SQL statement: SELECT t.* FROM `t2` AS t LIMIT 1
18/01/27 05:12:00 INFO orm.CompilationManager: HADOOP_MAPRED_HOME is /opt/cloudera/parcels/CDH/lib/hadoop-mapreduce
Note: /tmp/sqoop-donghua/compile/631be22fe0124698ede97beba0c8288e/t2.java uses or overrides a deprecated API.
Note: Recompile with -Xlint:deprecation for details.
18/01/27 05:12:01 INFO orm.CompilationManager: Writing jar file: /tmp/sqoop-donghua/compile/631be22fe0124698ede97beba0c8288e/t2.jar
18/01/27 05:12:02 ERROR tool.ImportTool: Import failed: --merge-key or --append is required when using --incremental lastmodified and the output directory exists.

[donghua@cdh-vm ~]$ sqoop import --connect jdbc:mysql://cdh-vm.dbaglobe.com/employees --username employee_user --password password --table t2 -m 1 --target-dir=/user/donghua/t2 --check-column=last_updated_at --incremental=lastmodified --last-value='2018-01-27 05:06:03.0' --merge-key=id
Warning: /opt/cloudera/parcels/CDH-5.13.1-1.cdh5.13.1.p0.2/bin/../lib/sqoop/../accumulo does not exist! Accumulo imports will fail.
Please set $ACCUMULO_HOME to the root of your Accumulo installation.
18/01/27 05:12:40 INFO sqoop.Sqoop: Running Sqoop version: 1.4.6-cdh5.13.1
18/01/27 05:12:40 WARN tool.BaseSqoopTool: Setting your password on the command-line is insecure. Consider using -P instead.
18/01/27 05:12:40 INFO manager.MySQLManager: Preparing to use a MySQL streaming resultset.
18/01/27 05:12:40 INFO tool.CodeGenTool: Beginning code generation
18/01/27 05:12:41 INFO manager.SqlManager: Executing SQL statement: SELECT t.* FROM `t2` AS t LIMIT 1
18/01/27 05:12:41 INFO manager.SqlManager: Executing SQL statement: SELECT t.* FROM `t2` AS t LIMIT 1
18/01/27 05:12:41 INFO orm.CompilationManager: HADOOP_MAPRED_HOME is /opt/cloudera/parcels/CDH/lib/hadoop-mapreduce
Note: /tmp/sqoop-donghua/compile/2e4f90897221b505b822c323c3cb2b41/t2.java uses or overrides a deprecated API.
Note: Recompile with -Xlint:deprecation for details.
18/01/27 05:12:42 INFO orm.CompilationManager: Writing jar file: /tmp/sqoop-donghua/compile/2e4f90897221b505b822c323c3cb2b41/t2.jar
18/01/27 05:12:43 INFO manager.SqlManager: Executing SQL statement: SELECT t.* FROM `t2` AS t LIMIT 1
18/01/27 05:12:43 INFO tool.ImportTool: Incremental import based on column `last_updated_at`
18/01/27 05:12:43 INFO tool.ImportTool: Lower bound value: '2018-01-27 05:06:03.0'
18/01/27 05:12:43 INFO tool.ImportTool: Upper bound value: '2018-01-27 05:12:43.0'
18/01/27 05:12:43 WARN manager.MySQLManager: It looks like you are importing from mysql.
18/01/27 05:12:43 WARN manager.MySQLManager: This transfer can be faster! Use the --direct
18/01/27 05:12:43 WARN manager.MySQLManager: option to exercise a MySQL-specific fast path.
18/01/27 05:12:43 INFO manager.MySQLManager: Setting zero DATETIME behavior to convertToNull (mysql)
18/01/27 05:12:43 INFO mapreduce.ImportJobBase: Beginning import of t2
18/01/27 05:12:43 INFO Configuration.deprecation: mapred.jar is deprecated. Instead, use mapreduce.job.jar
18/01/27 05:12:43 INFO Configuration.deprecation: mapred.map.tasks is deprecated. Instead, use mapreduce.job.maps
18/01/27 05:12:43 INFO client.RMProxy: Connecting to ResourceManager at cdh-vm.dbaglobe.com/192.168.56.10:8032
18/01/27 05:12:47 INFO db.DBInputFormat: Using read commited transaction isolation
18/01/27 05:12:47 INFO mapreduce.JobSubmitter: number of splits:1
18/01/27 05:12:48 INFO mapreduce.JobSubmitter: Submitting tokens for job: job_1517023991003_0016
18/01/27 05:12:48 INFO impl.YarnClientImpl: Submitted application application_1517023991003_0016
18/01/27 05:12:48 INFO mapreduce.Job: The url to track the job: http://cdh-vm.dbaglobe.com:8088/proxy/application_1517023991003_0016/
18/01/27 05:12:48 INFO mapreduce.Job: Running job: job_1517023991003_0016
18/01/27 05:12:55 INFO mapreduce.Job: Job job_1517023991003_0016 running in uber mode : false
18/01/27 05:12:55 INFO mapreduce.Job: map 0% reduce 0%
18/01/27 05:13:01 INFO mapreduce.Job: map 100% reduce 0%
18/01/27 05:13:02 INFO mapreduce.Job: Job job_1517023991003_0016 completed successfully
18/01/27 05:13:02 INFO mapreduce.Job: Counters: 30
File System Counters
FILE: Number of bytes read=0
FILE: Number of bytes written=175177
FILE: Number of read operations=0
FILE: Number of large read operations=0
FILE: Number of write operations=0
HDFS: Number of bytes read=87
HDFS: Number of bytes written=48
HDFS: Number of read operations=4
HDFS: Number of large read operations=0
HDFS: Number of write operations=2
Job Counters
Launched map tasks=1
Other local map tasks=1
Total time spent by all maps in occupied slots (ms)=4073
Total time spent by all reduces in occupied slots (ms)=0
Total time spent by all map tasks (ms)=4073
Total vcore-milliseconds taken by all map tasks=4073
Total megabyte-milliseconds taken by all map tasks=6256128
Map-Reduce Framework
Map input records=2
Map output records=2
Input split bytes=87
Spilled Records=0
Failed Shuffles=0
Merged Map outputs=0
GC time elapsed (ms)=61
CPU time spent (ms)=920
Physical memory (bytes) snapshot=196935680
Virtual memory (bytes) snapshot=2785828864
Total committed heap usage (bytes)=155713536
File Input Format Counters
Bytes Read=0
File Output Format Counters
Bytes Written=48
18/01/27 05:13:02 INFO mapreduce.ImportJobBase: Transferred 48 bytes in 19.1241 seconds (2.5099 bytes/sec)
18/01/27 05:13:02 INFO mapreduce.ImportJobBase: Retrieved 2 records.
18/01/27 05:13:02 INFO tool.ImportTool: Final destination exists, will run merge job.
18/01/27 05:13:02 INFO Configuration.deprecation: mapred.output.key.class is deprecated. Instead, use mapreduce.job.output.key.class
18/01/27 05:13:02 INFO client.RMProxy: Connecting to ResourceManager at cdh-vm.dbaglobe.com/192.168.56.10:8032
18/01/27 05:13:07 INFO input.FileInputFormat: Total input paths to process : 2
18/01/27 05:13:07 INFO mapreduce.JobSubmitter: number of splits:2
18/01/27 05:13:07 INFO mapreduce.JobSubmitter: Submitting tokens for job: job_1517023991003_0017
18/01/27 05:13:08 INFO impl.YarnClientImpl: Submitted application application_1517023991003_0017
18/01/27 05:13:08 INFO mapreduce.Job: The url to track the job: http://cdh-vm.dbaglobe.com:8088/proxy/application_1517023991003_0017/
18/01/27 05:13:08 INFO mapreduce.Job: Running job: job_1517023991003_0017
18/01/27 05:13:15 INFO mapreduce.Job: Job job_1517023991003_0017 running in uber mode : false
18/01/27 05:13:15 INFO mapreduce.Job: map 0% reduce 0%
18/01/27 05:13:20 INFO mapreduce.Job: map 50% reduce 0%
18/01/27 05:13:24 INFO mapreduce.Job: map 100% reduce 0%
18/01/27 05:13:31 INFO mapreduce.Job: map 100% reduce 100%
18/01/27 05:13:31 INFO mapreduce.Job: Job job_1517023991003_0017 completed successfully
18/01/27 05:13:31 INFO mapreduce.Job: Counters: 49
File System Counters
FILE: Number of bytes read=90
FILE: Number of bytes written=526653
FILE: Number of read operations=0
FILE: Number of large read operations=0
FILE: Number of write operations=0
HDFS: Number of bytes read=386
HDFS: Number of bytes written=72
HDFS: Number of read operations=9
HDFS: Number of large read operations=0
HDFS: Number of write operations=2
Job Counters
Launched map tasks=2
Launched reduce tasks=1
Data-local map tasks=2
Total time spent by all maps in occupied slots (ms)=6496
Total time spent by all reduces in occupied slots (ms)=3317
Total time spent by all map tasks (ms)=6496
Total time spent by all reduce tasks (ms)=3317
Total vcore-milliseconds taken by all map tasks=6496
Total vcore-milliseconds taken by all reduce tasks=3317
Total megabyte-milliseconds taken by all map tasks=9977856
Total megabyte-milliseconds taken by all reduce tasks=5094912
Map-Reduce Framework
Map input records=4
Map output records=4
Map output bytes=96
Map output materialized bytes=122
Input split bytes=290
Combine input records=0
Combine output records=0
Reduce input groups=3
Reduce shuffle bytes=122
Reduce input records=4
Reduce output records=3
Spilled Records=8
Shuffled Maps =2
Failed Shuffles=0
Merged Map outputs=2
GC time elapsed (ms)=211
CPU time spent (ms)=1900
Physical memory (bytes) snapshot=1147371520
Virtual memory (bytes) snapshot=8375828480
Total committed heap usage (bytes)=1154482176
Shuffle Errors
BAD_ID=0
CONNECTION=0
IO_ERROR=0
WRONG_LENGTH=0
WRONG_MAP=0
WRONG_REDUCE=0
File Input Format Counters
Bytes Read=96
File Output Format Counters
Bytes Written=72
18/01/27 05:13:31 INFO tool.ImportTool: Incremental import complete! To run another incremental import of all data following this import, supply the following arguments:
18/01/27 05:13:31 INFO tool.ImportTool: --incremental lastmodified
18/01/27 05:13:31 INFO tool.ImportTool: --check-column last_updated_at
18/01/27 05:13:31 INFO tool.ImportTool: --last-value 2018-01-27 05:12:43.0
18/01/27 05:13:31 INFO tool.ImportTool: (Consider saving this with 'sqoop job --create')
[donghua@cdh-vm ~]$ hdfs dfs -ls /user/donghua/t2/
Found 2 items
-rw-r--r-- 1 donghua supergroup 0 2018-01-27 05:13 /user/donghua/t2/_SUCCESS
-rw-r--r-- 1 donghua supergroup 72 2018-01-27 05:13 /user/donghua/t2/part-r-00000
[donghua@cdh-vm ~]$ hdfs dfs -cat /user/donghua/t2/part-r-00000
1,2018-01-27 04:50:07.0
2,2018-01-27 05:10:14.0
3,2018-01-27 05:09:45.0
[donghua@cdh-vm ~]$

↧

Use sqoop export to move data from HDFS into MySQL

January 27, 2018, 2:49 am

≫ Next: Convert Excel into CSV using pandas

≪ Previous: Use Sqoop to perform incremental data loading (--incremental=lastmodified for timestamp column)

MariaDB [employees]> create table current_dept_emp2 as select * from current_dept_emp where 1=2;
Query OK, 0 rows affected (0.05 sec)
Records: 0 Duplicates: 0 Warnings: 0

[donghua@cdh-vm ~]$ sqoop export --connect jdbc:mysql://cdh-vm.dbaglobe.com/employees --username employee_user --password password --table current_dept_emp2 --export-dir /user/donghua/current_dept_emp
Warning: /opt/cloudera/parcels/CDH-5.13.1-1.cdh5.13.1.p0.2/bin/../lib/sqoop/../accumulo does not exist! Accumulo imports will fail.
Please set $ACCUMULO_HOME to the root of your Accumulo installation.
18/01/27 05:43:54 INFO sqoop.Sqoop: Running Sqoop version: 1.4.6-cdh5.13.1
18/01/27 05:43:54 WARN tool.BaseSqoopTool: Setting your password on the command-line is insecure. Consider using -P instead.
18/01/27 05:43:55 INFO manager.MySQLManager: Preparing to use a MySQL streaming resultset.
18/01/27 05:43:55 INFO tool.CodeGenTool: Beginning code generation
18/01/27 05:43:55 INFO manager.SqlManager: Executing SQL statement: SELECT t.* FROM `current_dept_emp2` AS t LIMIT 1
18/01/27 05:43:55 INFO manager.SqlManager: Executing SQL statement: SELECT t.* FROM `current_dept_emp2` AS t LIMIT 1
18/01/27 05:43:55 INFO orm.CompilationManager: HADOOP_MAPRED_HOME is /opt/cloudera/parcels/CDH/lib/hadoop-mapreduce
Note: /tmp/sqoop-donghua/compile/4eb832477301808137f8d255765ba2ca/current_dept_emp2.java uses or overrides a deprecated API.
Note: Recompile with -Xlint:deprecation for details.
18/01/27 05:43:56 INFO orm.CompilationManager: Writing jar file: /tmp/sqoop-donghua/compile/4eb832477301808137f8d255765ba2ca/current_dept_emp2.jar
18/01/27 05:43:56 INFO mapreduce.ExportJobBase: Beginning export of current_dept_emp2
18/01/27 05:43:57 INFO Configuration.deprecation: mapred.jar is deprecated. Instead, use mapreduce.job.jar
18/01/27 05:43:58 INFO Configuration.deprecation: mapred.reduce.tasks.speculative.execution is deprecated. Instead, use mapreduce.reduce.speculative
18/01/27 05:43:58 INFO Configuration.deprecation: mapred.map.tasks.speculative.execution is deprecated. Instead, use mapreduce.map.speculative
18/01/27 05:43:58 INFO Configuration.deprecation: mapred.map.tasks is deprecated. Instead, use mapreduce.job.maps
18/01/27 05:43:58 INFO client.RMProxy: Connecting to ResourceManager at cdh-vm.dbaglobe.com/192.168.56.10:8032
18/01/27 05:44:00 INFO input.FileInputFormat: Total input paths to process : 1
18/01/27 05:44:00 INFO input.FileInputFormat: Total input paths to process : 1
18/01/27 05:44:00 INFO mapreduce.JobSubmitter: number of splits:4
18/01/27 05:44:01 INFO mapreduce.JobSubmitter: Submitting tokens for job: job_1517023991003_0018
18/01/27 05:44:01 INFO impl.YarnClientImpl: Submitted application application_1517023991003_0018
18/01/27 05:44:01 INFO mapreduce.Job: The url to track the job: http://cdh-vm.dbaglobe.com:8088/proxy/application_1517023991003_0018/
18/01/27 05:44:01 INFO mapreduce.Job: Running job: job_1517023991003_0018
18/01/27 05:44:08 INFO mapreduce.Job: Job job_1517023991003_0018 running in uber mode : false
18/01/27 05:44:08 INFO mapreduce.Job: map 0% reduce 0%
18/01/27 05:44:16 INFO mapreduce.Job: map 25% reduce 0%
18/01/27 05:44:22 INFO mapreduce.Job: map 50% reduce 0%
18/01/27 05:44:28 INFO mapreduce.Job: map 75% reduce 0%
18/01/27 05:44:34 INFO mapreduce.Job: map 100% reduce 0%
18/01/27 05:44:35 INFO mapreduce.Job: Job job_1517023991003_0018 completed successfully
18/01/27 05:44:35 INFO mapreduce.Job: Counters: 30
File System Counters
FILE: Number of bytes read=0
FILE: Number of bytes written=695328
FILE: Number of read operations=0
FILE: Number of large read operations=0
FILE: Number of write operations=0
HDFS: Number of bytes read=10241715
HDFS: Number of bytes written=0
HDFS: Number of read operations=19
HDFS: Number of large read operations=0
HDFS: Number of write operations=0
Job Counters
Launched map tasks=4
Data-local map tasks=4
Total time spent by all maps in occupied slots (ms)=20479
Total time spent by all reduces in occupied slots (ms)=0
Total time spent by all map tasks (ms)=20479
Total vcore-milliseconds taken by all map tasks=20479
Total megabyte-milliseconds taken by all map tasks=31455744
Map-Reduce Framework
Map input records=300024
Map output records=300024
Input split bytes=711
Spilled Records=0
Failed Shuffles=0
Merged Map outputs=0
GC time elapsed (ms)=332
CPU time spent (ms)=15020
Physical memory (bytes) snapshot=1057984512
Virtual memory (bytes) snapshot=11192446976
Total committed heap usage (bytes)=862453760
File Input Format Counters
Bytes Read=0
File Output Format Counters
Bytes Written=0
18/01/27 05:44:35 INFO mapreduce.ExportJobBase: Transferred 9.7673 MB in 37.4601 seconds (266.9952 KB/sec)
18/01/27 05:44:35 INFO mapreduce.ExportJobBase: Exported 300024 records.

MariaDB [employees]> select count(*) from current_dept_emp2;
+----------+
| count(*) |
+----------+
| 300024 |
+----------+
1 row in set (0.09 sec)

↧

Convert Excel into CSV using pandas

January 27, 2018, 8:04 am

≫ Next: warning: setlocale: LC_CTYPE: cannot change locale (UTF-8)

≪ Previous: Use sqoop export to move data from HDFS into MySQL

Donghuas-MacBook-Air:Downloads donghua$ python

Python 3.6.3 |Anaconda, Inc.| (default, Oct  6 2017, 12:04:38) 

[GCC 4.2.1 Compatible Clang 4.0.1 (tags/RELEASE_401/final)] on darwin

Type "help", "copyright", "credits" or "license" for more information.

>>> import pandas as pd

>>> import numpy as np

>>> df = pd.read_excel("/Users/donghua/Downloads/LN University.xls",sheet_name="Sheet1",header=None, skiprows=3)

>>> df.head(1)

   0       1             2            3       4    5       6   7   \

0   1  鞍山师范学院  201310169001  花楸果实中花青素的提取  创新训练项目  侯文锋  110604   4   

                                          8   9   10     11    12     13   14  \

0  杨晓龙 110607 \n王博  110505 \n陈中意 110629       辛广  教授  15000  5000  10000  550   

15

0  本项目以花楸为原材料，通过用表面活性剂结合酸化的常规提取剂辅助超声波法提取花楸果实中花青素，...  

>>> df.to_csv("/Users/donghua/Downloads/LN_University_20180125.csv",sep='\t',header=False, encoding='utf-8') 

>>> exit()

Donghuas-MacBook-Air:Downloads donghua$ 

↧

warning: setlocale: LC_CTYPE: cannot change locale (UTF-8)

February 2, 2018, 4:27 am

≫ Next: Use Python script to manage Cloudera CDH services

≪ Previous: Convert Excel into CSV using pandas

Donghuas-MacBook-Air:~ donghua$ ssh 192.168.31.5
Last login: Fri Feb 2 07:19:47 2018 from 192.168.31.177
-bash: warning: setlocale: LC_CTYPE: cannot change locale (UTF-8): No such file or directory

[donghua@localhost ~]$ vi /etc/environment

# Add following 2 lines
LANG=en_US.utf-8
LC_ALL=en_US.utf-8

Donghuas-MacBook-Air:~ donghua$ ssh 192.168.31.5
Last login: Fri Feb 2 07:25:15 2018 from 192.168.31.177
[donghua@localhost ~]$

↧

Use Python script to manage Cloudera CDH services

February 2, 2018, 8:10 pm

≫ Next: Change timezone on Redhat EL7

≪ Previous: warning: setlocale: LC_CTYPE: cannot change locale (UTF-8)

https://github.com/luodonghua/bigdata/blob/master/start_cluster.py

https://github.com/luodonghua/bigdata/blob/master/stop_cluster.py

https://github.com/luodonghua/bigdata/blob/master/status_cluster.py

# pip install cm-api

#API Doc: https://cloudera.github.io/cm_api/docs/python-client/

[donghua@cdh-vm scripts]$ ./start_cluster.py

: cluster; version: CDH5

Name: hdfs Before: STOPPED Result: True

Name: zookeeper Before: STOPPED Result: True

Name: yarn Before: STOPPED Result: True

Name: spark_on_yarn Before: STOPPED Result: True

Name: hive Before: STOPPED Result: True

Name: impala Before: STOPPED Result: True

Name: hue Before: STOPPED Result: True

[donghua@cdh-vm scripts]$ ./status_cluster.py

: cluster; version: CDH5

Service Name Service StateHealth Summary

--------------- ---------------------------

hdfs STARTED GOOD

yarn STARTED GOOD

spark_on_yarn STARTED GOOD

hive STARTED GOOD

zookeeper STARTED GOOD

impala STARTED GOOD

oozie STOPPED DISABLED

hue STARTED GOOD

sqoop_client NA GOOD

↧

Change timezone on Redhat EL7

February 3, 2018, 3:55 pm

≫ Next: Fix "you are accessing a non-optimized Hue" message with Proxy/LB setup

≪ Previous: Use Python script to manage Cloudera CDH services

[root@cdh-vm ~]# timedatectl
Local time: Sat 2018-02-03 18:51:25 EST
Universal time: Sat 2018-02-03 23:51:25 UTC
RTC time: Sat 2018-02-03 23:51:22
Time zone: America/New_York (EST, -0500)
NTP enabled: yes
NTP synchronized: yes
RTC in local TZ: no
DST active: no
Last DST change: DST ended at
Sun 2017-11-05 01:59:59 EDT
Sun 2017-11-05 01:00:00 EST
Next DST change: DST begins (the clock jumps one hour forward) at
Sun 2018-03-11 01:59:59 EST
Sun 2018-03-11 03:00:00 EDT

[root@cdh-vm ~]# timedatectl list-timezones |grep -i singapore
Asia/Singapore

[root@cdh-vm ~]# timedatectl set-timezone Asia/Singapore

[root@cdh-vm ~]# timedatectl
Local time: Sun 2018-02-04 07:52:04 +08
Universal time: Sat 2018-02-03 23:52:04 UTC
RTC time: Sat 2018-02-03 23:52:01
Time zone: Asia/Singapore (+08, +0800)
NTP enabled: yes
NTP synchronized: yes
RTC in local TZ: no
DST active: n/a

↧

Fix "you are accessing a non-optimized Hue" message with Proxy/LB setup

February 3, 2018, 5:16 pm

≫ Next: warning: "set mapreduce.framework.name=local" terminates hiveserver2 server process

≪ Previous: Change timezone on Redhat EL7

Experienced warning message during accessing Hue Load Balancer service via Nginx proxy:
You are accessing a non-optimized Hue, please switch to one of the available addresses: http://cdh-vm.dbaglobe.com:8889

How to fix:
Hue -> Configuration -> Hue Service Advanced Configuration Snippet (Safety Valve) for hue_safety_valve.ini

[desktop]
hue_load_balancer=http://192.168.31.5:8889,http://cdh-vm.dbaglobe.com:8889

↧

warning: "set mapreduce.framework.name=local" terminates hiveserver2 server process

February 6, 2018, 6:24 am

≫ Next: SQL Server JDBC integrated authentication error

≪ Previous: Fix "you are accessing a non-optimized Hue" message with Proxy/LB setup

0: jdbc:hive2://cdh-vm:10000/employees> set mapreduce.framework.name=local;
No rows affected (0.015 seconds)
0: jdbc:hive2://cdh-vm:10000/employees> select id,count(*) num from t1 group by id order by num;
Unknown HS2 problem when communicating with Thrift server.
Error: org.apache.thrift.transport.TTransportException: java.net.SocketException: Broken pipe (Write failed) (state=08S01,code=0)

# /var/run/cloudera-scm-agent/process/175-hive-HIVESERVER2/logs/stderr.log
Job running in-process (local Hadoop)
+ ps -p 898 -c
+ grep java
+ RET=0
+ '[' 0 -eq 0 ']'
+ TARGET=898
++ date
+ echo Tue Feb 6 22:19:34 +08 2018
+ kill -9 898

↧

SQL Server JDBC integrated authentication error

February 9, 2018, 8:14 am

≫ Next: How to upgrade outdated Python packages

≪ Previous: warning: "set mapreduce.framework.name=local" terminates hiveserver2 server process

Connection failure. You must change the Database Settings.
com.microsoft.sqlserver.jdbc.SQLServerException: This driver is not configured for integrated authentication. ClientConnectionId:b31236b3-c830-45c9-bdb0-8e9ecbe01476
at com.microsoft.sqlserver.jdbc.SQLServerConnection.terminate(SQLServerConnection.java:2400)
at com.microsoft.sqlserver.jdbc.AuthenticationJNI.(AuthenticationJNI.java:68)
at com.microsoft.sqlserver.jdbc.SQLServerConnection.logon(SQLServerConnection.java:3132)
at com.microsoft.sqlserver.jdbc.SQLServerConnection.access$100(SQLServerConnection.java:43)
at com.microsoft.sqlserver.jdbc.SQLServerConnection$LogonCommand.doExecute(SQLServerConnection.java:3123)
at com.microsoft.sqlserver.jdbc.TDSCommand.execute(IOBuffer.java:7505)
at com.microsoft.sqlserver.jdbc.SQLServerConnection.executeCommand(SQLServerConnection.java:2445)
at com.microsoft.sqlserver.jdbc.SQLServerConnection.connectHelper(SQLServerConnection.java:1981)
at com.microsoft.sqlserver.jdbc.SQLServerConnection.login(SQLServerConnection.java:1628)
at com.microsoft.sqlserver.jdbc.SQLServerConnection.connectInternal(SQLServerConnection.java:1459)
at com.microsoft.sqlserver.jdbc.SQLServerConnection.connect(SQLServerConnection.java:773)
at com.microsoft.sqlserver.jdbc.SQLServerDriver.connect(SQLServerDriver.java:1168)
at org.talend.core.model.metadata.builder.database.DriverShim.connect(DriverShim.java:41)
at org.talend.core.model.metadata.builder.database.ExtractMetaDataUtils.connect(ExtractMetaDataUtils.java:1069)
at org.talend.core.model.metadata.builder.database.ExtractMetaDataFromDataBase.testConnection(ExtractMetaDataFromDataBase.java:315)
at org.talend.metadata.managment.repository.ManagerConnection.check(ManagerConnection.java:289)
at org.talend.repository.ui.wizards.metadata.connection.database.DatabaseForm$62.runWithCancel(DatabaseForm.java:3983)
at org.talend.repository.ui.wizards.metadata.connection.database.DatabaseForm$62.runWithCancel(DatabaseForm.java:1)
at org.talend.repository.ui.dialog.AProgressMonitorDialogWithCancel$1.runnableWithCancel(AProgressMonitorDialogWithCancel.java:77)
at org.talend.repository.ui.dialog.AProgressMonitorDialogWithCancel$ARunnableWithProgressCancel$1.call(AProgressMonitorDialogWithCancel.java:161)
at java.util.concurrent.FutureTask.run(Unknown Source)
at java.lang.Thread.run(Unknown Source)
Caused by: java.lang.UnsatisfiedLinkError: no sqljdbc_auth in java.library.path
at java.lang.ClassLoader.loadLibrary(Unknown Source)
at java.lang.Runtime.loadLibrary0(Unknown Source)
at java.lang.System.loadLibrary(Unknown Source)
at com.microsoft.sqlserver.jdbc.AuthenticationJNI.(AuthenticationJNI.java:41)
... 20 more

How to fix:

Copy OS architecture matching file (x32 or x64) into c:\Windows or C:\Windows\system32

PS C:\Users\Administrator> copy C:\Donghua\TOD-DI\sqljdbc_6.0\enu\auth\x64\sqljdbc_auth.dll C:\Windows\
PS C:\Users\Administrator> dir C:\Windows\sqljdbc_auth.dll

Directory: C:\Windows

Mode LastWriteTime Length Name
---- ------------- ------ ----
-a---- 1/17/2017 11:44 AM 310088 sqljdbc_auth.dll

↧

How to upgrade outdated Python packages

February 9, 2018, 4:49 pm

≫ Next: pycurl: libcurl link-time ssl backend (nss) is different from compile-time ssl backend (none/other)

≪ Previous: SQL Server JDBC integrated authentication error

[root@cdh-vm logs]# pip list --outdated --format=legacy

argcomplete (1.8.2) - Latest: 1.9.3 [wheel]

backports.ssl-match-hostname (3.4.0.2) - Latest: 3.5.0.1 [sdist]

beautifulsoup4 (4.5.3) - Latest: 4.6.0 [wheel]

chardet (2.3.0) - Latest: 3.0.4 [wheel]

configobj (4.7.2) - Latest: 5.0.6 [sdist]

decorator (3.4.0) - Latest: 4.2.1 [wheel]

docx2txt (0.6) - Latest: 0.7 [sdist]

EbookLib (0.15) - Latest: 0.16 [sdist]

perf (0.1) - Latest: 1.5.1 [wheel]

psycopg2 (2.5.1) - Latest: 2.7.4 [wheel]

pycurl (7.19.0) - Latest: 7.43.0.1 [sdist]

pygobject (3.22.0) - Latest: 3.27.2 [sdist]

python-pptx (0.6.5) - Latest: 0.6.7 [sdist]

pyudev (0.15) - Latest: 0.21.0 [sdist]

pyxattr (0.5.1) - Latest: 0.6.0 [sdist]

setuptools (0.9.8) - Latest: 38.5.1 [wheel]

six (1.10.0) - Latest: 1.11.0 [wheel]

SpeechRecognition (3.6.3) - Latest: 3.8.1 [wheel]

urlgrabber (3.10) - Latest: 3.10.2 [sdist]

xlrd (1.0.0) - Latest: 1.1.0 [wheel]

[root@cdh-vm logs]# pip list --outdated --format=columns

Package Version Latest Type

---------------------------- ------- -------- -----

argcomplete 1.8.2 1.9.3 wheel

backports.ssl-match-hostname 3.4.0.2 3.5.0.1 sdist

beautifulsoup4 4.5.3 4.6.0 wheel

chardet 2.3.0 3.0.4 wheel

configobj 4.7.2 5.0.6 sdist

decorator 3.4.0 4.2.1 wheel

docx2txt 0.6 0.7 sdist

EbookLib 0.15 0.16 sdist

perf 0.1 1.5.1 wheel

psycopg2 2.5.1 2.7.4 wheel

pycurl 7.19.0 7.43.0.1 sdist

pygobject 3.22.0 3.27.2 sdist

python-pptx 0.6.5 0.6.7 sdist

pyudev 0.15 0.21.0 sdist

pyxattr 0.5.1 0.6.0 sdist

setuptools 0.9.8 38.5.1 wheel

six 1.10.0 1.11.0 wheel

SpeechRecognition 3.6.3 3.8.1 wheel

urlgrabber 3.10 3.10.2 sdist

xlrd 1.0.0 1.1.0 wheel

# Upgrade manually one by one

[root@cdh-vm logs]# pip install pycurl -U

# Upgrade all at once (high chance rollback if some package fail

# to upgrade

[root@cdh-vm logs]# pip install $(pip list --outdated --format=columns |tail -n +3|cut -d"" -f1) --upgrade

# Upgrade one by one using loop

[root@cdh-vm logs]# for i in $(pip list --outdated --format=columns |tail -n +3|cut -d"" -f1); do pip install $i --upgrade; done

↧

pycurl: libcurl link-time ssl backend (nss) is different from compile-time ssl backend (none/other)

February 9, 2018, 7:33 pm

≫ Next: Learning Apache Pig Chap 2 (Oreilly)

≪ Previous: How to upgrade outdated Python packages

[root@cdh-vm logs]# yum update -y
There was a problem importing one of the Python modules
required to run yum. The error leading to this problem was:

pycurl: libcurl link-time ssl backend (nss) is different from compile-time ssl backend (none/other)

Please install a package which provides this module, or
verify that the module is installed correctly.

It's possible that the above module doesn't match the
current version of Python, which is:
2.7.5 (default, Aug 4 2017, 00:39:18)
[GCC 4.8.5 20150623 (Red Hat 4.8.5-16)]

If you cannot solve this problem yourself, please go to
the yum faq at:
http://yum.baseurl.org/wiki/Faq

How to fix:

remove existing pycurl installation

[root@cdh-vm logs]# pip uninstall pycurl
export variable with your link-time ssl backend (which is nss above)

[root@cdh-vm logs]# export PYCURL_SSL_LIBRARY=nss
[root@cdh-vm logs]# pip install pycurl

↧

Learning Apache Pig Chap 2 (Oreilly)

February 17, 2018, 6:38 am

≫ Next: Complex data process in SQL vs Pig

≪ Previous: pycurl: libcurl link-time ssl backend (nss) is different from compile-time ssl backend (none/other)

Sample data files:
https://resources.oreilly.com/examples/0636920047704/blob/master/Learning%20Apache%20Pig%20-%20Working%20Files/Chapter%202/cities_small.txt
https://resources.oreilly.com/examples/0636920047704/blob/master/Learning%20Apache%20Pig%20-%20Working%20Files/Chapter%202/states.txt

[donghua@cdh-vm temp]$ pig -4 log4j.properties
grunt> cities = load 'cities_small.txt' as (name:chararray,state:chararray,pop:int);
grunt> aliases;
grunt> describe cities
cities: {name: chararray,state: chararray,pop: int}
grunt> \de cities
cities: {name: chararray,state: chararray,pop: int}
grunt> ca_cities = filter cities by (state=='CA');
grunt> dump ca_cities;
grunt> \d ca_cities
grunt> illustrate;
(South Gate,CA,96640)
--------------------------------------------------------------------
| cities | name:chararray | state:chararray | pop:int |
--------------------------------------------------------------------
| | South Gate | CA | 96640 |
--------------------------------------------------------------------

grunt> illustrate;
(Fresno,CA,476050)
--------------------------------------------------------------------
| cities | name:chararray | state:chararray | pop:int |
--------------------------------------------------------------------
| | Fresno | CA | 476050 |
--------------------------------------------------------------------

grunt> ordered_cities = order cities by pop desc;

grunt> states = load 'states.txt' as (rank:int,code:chararray,fullname:chararray,date_entered:chararray,year_entered:int);

grunt> cities_join_states = join cities by state, states by code;

grunt> illustrate cities_join_states;
(Fargo,ND,93531)
(39,ND,North Dakota,02-NOV,1889)
--------------------------------------------------------------------
| cities | name:chararray | state:chararray | pop:int |
--------------------------------------------------------------------
| | Fargo | ND | 93531 |
| | Fargo | ND | 93531 |
--------------------------------------------------------------------
--------------------------------------------------------------------------------------------------------------------------
| states | rank:int | code:chararray | fullname:chararray | date_entered:chararray | year_entered:int |
--------------------------------------------------------------------------------------------------------------------------
| | 39 | ND | North Dakota | 02-NOV | 1889 |
| | 39 | ND | North Dakota | 02-NOV | 1889 |
--------------------------------------------------------------------------------------------------------------------------
------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
| cities_join_states | cities::name:chararray | cities::state:chararray | cities::pop:int | states::rank:int | states::code:chararray | states::fullname:chararray | states::date_entered:chararray | states::year_entered:int |
------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
| | Fargo | ND | 93531 | 39 | ND | North Dakota | 02-NOV | 1889 |
| | Fargo | ND | 93531 | 39 | ND | North Dakota | 02-NOV | 1889 |
| | Fargo | ND | 93531 | 39 | ND | North Dakota | 02-NOV | 1889 |
| | Fargo | ND | 93531 | 39 | ND | North Dakota | 02-NOV | 1889 |
------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------

grunt> cities_join_states_short = foreach cities_join_states generate cities::name, states::fullname;

grunt> store cities_join_states_short into 'cities_join_states_short';
grunt> fs -ls cities_join_states_short
grunt> fs -cat cities_join_states_short/part-r-00000

grunt> cities_join_states_short = foreach (join cities by state, states by code) generate cities::name, states::fullname;
grunt> city_and_state = foreach cities generate name,state,pop*1.5;

grunt> cities_by_state = group cities by state;
grunt> \de cities_by_state;
cities_by_state: {group: chararray,cities: {(name: chararray,state: chararray,pop: int)}}
grunt> illustrate cities_by_state;
(Sioux Falls,SD,154997)
-----------------------------------------------------------------------
| cities | name:chararray | state:chararray | pop:int |
-----------------------------------------------------------------------
| | Sioux Falls | SD | 154997 |
| | Rapid City | SD | 65491 |
-----------------------------------------------------------------------
------------------------------------------------------------------------------------------------------------------------------
| cities_by_state | group:chararray | cities:bag{:tuple(name:chararray,state:chararray,pop:int)} |
------------------------------------------------------------------------------------------------------------------------------
| | SD | {(Sioux Falls, SD, 154997), (Rapid City, SD, 65491)} |
------------------------------------------------------------------------------------------------------------------------------

grunt> total_cities = foreach (group cities all) generate COUNT(cities);
grunt> \d total_cities;
(500)

grunt> cities_by_state = foreach (group cities by state) generate group, COUNT(cities);
grunt> \d cities_by_state;

grunt> cities_by_state = foreach (group cities by state parallel 3) generate group, COUNT(cities);
grunt> store cities_by_state into 'cities_by_state';
grunt> fs -ls cities_by_state
Found 4 items
-rw-r--r-- 1 donghua supergroup 0 2018-02-17 22:25 cities_by_state/_SUCCESS
-rw-r--r-- 1 donghua supergroup 113 2018-02-17 22:25 cities_by_state/part-r-00000
-rw-r--r-- 1 donghua supergroup 82 2018-02-17 22:25 cities_by_state/part-r-00001
-rw-r--r-- 1 donghua supergroup 86 2018-02-17 22:25 cities_by_state/part-r-00002

↧

Complex data process in SQL vs Pig

February 18, 2018, 6:23 am

≫ Next: Apache Reverse Proxy Example for Cloudera Yarn

≪ Previous: Learning Apache Pig Chap 2 (Oreilly)

[donghua@cdh-vm temp]$ hcat -e "desc employees.departments"
dept_no string
dept_name string

[donghua@cdh-vm temp]$ hcat -e "desc employees.dept_manager"
emp_no int
dept_no string
from_date string
to_date string

[donghua@cdh-vm temp]$ hcat -e "desc employees.dept_emp"
emp_no int
dept_no string
from_date string
to_date string

[donghua@cdh-vm temp]$ hcat -e "desc employees.employees"
emp_no int
birth_date string
first_name string
last_name string
gender string
hire_date string

-- Find out their manager name & department size
select d.dept_name, concat(m.first_name,'',m.last_name) as manager, count(e.emp_no) employees
from employees.departments d
join employees.dept_manager dm on d.dept_no = dm.dept_no
join employees.employees m on dm.emp_no = m.emp_no
join employees.dept_emp de on d.dept_no = de.dept_no
join employees.employees e on de.emp_no = e.emp_no
where de.to_date >'2018-01-01'
and dm.to_date > '2018-01-01'
group by d.dept_name, concat(m.first_name,'',m.last_name)
order by d.dept_name;

Run SQL in Hive:

Connecting to jdbc:hive2://cdh-vm.dbaglobe.com:10000/employees
Connected to: Apache Hive (version 1.1.0-cdh5.14.0)
Driver: Hive JDBC (version 1.1.0-cdh5.14.0)
Transaction isolation: TRANSACTION_REPEATABLE_READ
Beeline version 1.1.0-cdh5.14.0 by Apache Hive
0: jdbc:hive2://cdh-vm.dbaglobe.com:10000/emp> -- Find out their manager name & department size
0: jdbc:hive2://cdh-vm.dbaglobe.com:10000/emp> select d.dept_name, concat(m.first_name,'',m.last_name) as manager, count(e.emp_no) employees
. . . . . . . . . . . . . . . . . . . . . . .> from employees.departments d
. . . . . . . . . . . . . . . . . . . . . . .> join employees.dept_manager dm on d.dept_no = dm.dept_no
. . . . . . . . . . . . . . . . . . . . . . .> join employees.employees m on dm.emp_no = m.emp_no
. . . . . . . . . . . . . . . . . . . . . . .> join employees.dept_emp de on d.dept_no = de.dept_no
. . . . . . . . . . . . . . . . . . . . . . .> join employees.employees e on de.emp_no = e.emp_no
. . . . . . . . . . . . . . . . . . . . . . .> where de.to_date >'2018-01-01'
. . . . . . . . . . . . . . . . . . . . . . .> and dm.to_date > '2018-01-01'
. . . . . . . . . . . . . . . . . . . . . . .> group by d.dept_name, concat(m.first_name,'',m.last_name)
. . . . . . . . . . . . . . . . . . . . . . .> order by d.dept_name;

+---------------------+--------------------+------------+--+
| d.dept_name | manager | employees |
+---------------------+--------------------+------------+--+
| Customer Service | Yuchang Weedman | 17569 |
| Development | Leon DasSarma | 61386 |
| Finance | Isamu Legleitner | 12437 |
| Human Resources | Karsten Sigstam | 12898 |
| Marketing | Vishwani Minakawa | 14842 |
| Production | Oscar Ghazalie | 53304 |
| Quality Management | Dung Pesch | 14546 |
| Research | Hilary Kambil | 15441 |
| Sales | Hauke Zhang | 37701 |
+---------------------+--------------------+------------+--+
9 rows selected (100.528 seconds)

Run SQL in Impala:

Connected to cdh-vm.dbaglobe.com:21000
Server version: impalad version 2.11.0-cdh5.14.0 RELEASE (build d68206561bce6b26762d62c01a78e6cd27aa7690)
***********************************************************************************
Welcome to the Impala shell.
(Impala Shell v2.11.0-cdh5.14.0 (d682065) built on Sat Jan 6 13:27:16 PST 2018)

Press TAB twice to see a list of available commands.
***********************************************************************************
[cdh-vm.dbaglobe.com:21000] > -- Find out their manager name & department size
> select d.dept_name, concat(m.first_name,'',m.last_name) as manager, count(e.emp_no) employees
> from employees.departments d
> join employees.dept_manager dm on d.dept_no = dm.dept_no
> join employees.employees m on dm.emp_no = m.emp_no
> join employees.dept_emp de on d.dept_no = de.dept_no
> join employees.employees e on de.emp_no = e.emp_no
> where de.to_date >'2018-01-01'
> and dm.to_date > '2018-01-01'
> group by d.dept_name, concat(m.first_name,'',m.last_name)
> order by d.dept_name;
Query: -- Find out their manager name & department size
select d.dept_name, concat(m.first_name,'',m.last_name) as manager, count(e.emp_no) employees
from employees.departments d
join employees.dept_manager dm on d.dept_no = dm.dept_no
join employees.employees m on dm.emp_no = m.emp_no
join employees.dept_emp de on d.dept_no = de.dept_no
join employees.employees e on de.emp_no = e.emp_no
where de.to_date >'2018-01-01'
and dm.to_date > '2018-01-01'
group by d.dept_name, concat(m.first_name,'',m.last_name)
order by d.dept_name
Query submitted at: 2018-02-18 20:58:51 (Coordinator: http://cdh-vm.dbaglobe.com:25000)
Query progress can be monitored at: http://cdh-vm.dbaglobe.com:25000/query_plan?query_id=a04e8317637c0e4a:a83017f00000000
+--------------------+-------------------+-----------+
| dept_name | manager | employees |
+--------------------+-------------------+-----------+
| Customer Service | Yuchang Weedman | 17569 |
| Development | Leon DasSarma | 61386 |
| Finance | Isamu Legleitner | 12437 |
| Human Resources | Karsten Sigstam | 12898 |
| Marketing | Vishwani Minakawa | 14842 |
| Production | Oscar Ghazalie | 53304 |
| Quality Management | Dung Pesch | 14546 |
| Research | Hilary Kambil | 15441 |
| Sales | Hauke Zhang | 37701 |
+--------------------+-------------------+-----------+
Fetched 9 row(s) in 19.43s

Run SQL in Mysql:

Server version: 5.5.56-MariaDB MariaDB Server

Copyright (c) 2000, 2017, Oracle, MariaDB Corporation Ab and others.

Type 'help;' or '\h' for help. Type '\c' to clear the current input statement.

MariaDB [employees]> -- Find out their manager name & department size
MariaDB [employees]> select d.dept_name, concat(m.first_name,'',m.last_name) as manager, count(e.emp_no) employees
-> from employees.departments d
-> join employees.dept_manager dm on d.dept_no = dm.dept_no
-> join employees.employees m on dm.emp_no = m.emp_no
-> join employees.dept_emp de on d.dept_no = de.dept_no
-> join employees.employees e on de.emp_no = e.emp_no
-> where de.to_date >'2018-01-01'
-> and dm.to_date > '2018-01-01'
-> group by d.dept_name, concat(m.first_name,'',m.last_name)
-> order by d.dept_name;
+--------------------+-------------------+-----------+
| dept_name | manager | employees |
+--------------------+-------------------+-----------+
| Customer Service | Yuchang Weedman | 17569 |
| Development | Leon DasSarma | 61386 |
| Finance | Isamu Legleitner | 12437 |
| Human Resources | Karsten Sigstam | 12898 |
| Marketing | Vishwani Minakawa | 14842 |
| Production | Oscar Ghazalie | 53304 |
| Quality Management | Dung Pesch | 14546 |
| Research | Hilary Kambil | 15441 |
| Sales | Hauke Zhang | 37701 |
+--------------------+-------------------+-----------+
9 rows in set (1.62 sec)

Re-write SQL in Pig-Latin:

-- pig script
-- Find out department size and their manager name
d0 = LOAD 'employees.departments' USING org.apache.hive.hcatalog.pig.HCatLoader();
dm0 = LOAD 'employees.dept_manager' USING org.apache.hive.hcatalog.pig.HCatLoader();
de0 = LOAD 'employees.dept_emp' USING org.apache.hive.hcatalog.pig.HCatLoader();
e0 = LOAD 'employees.employees' USING org.apache.hive.hcatalog.pig.HCatLoader();

d1 = FOREACH d0 GENERATE dept_no, dept_name;
dm1 = FOREACH (FILTER dm0 BY to_date >'2018-01-01') GENERATE dept_no, emp_no;
de1 = FOREACH (FILTER de0 by to_date >'2018-01-01') GENERATE dept_no, emp_no;
e1 = FOREACH e0 GENERATE emp_no, CONCAT(first_name,'',last_name) AS fullname;

d1_dm1 = JOIN d1 BY dept_no, dm1 BY dept_no;
d1_dm1_e1 = JOIN d1_dm1 BY emp_no, e1 BY emp_no;

dept_mgr = FOREACH d1_dm1_e1 GENERATE d1_dm1::d1::dept_no AS dept_no, d1_dm1::d1::dept_name AS dept_name, e1::fullname AS manager;

d1_de1 = JOIN d1 BY dept_no, de1 BY dept_no;
d1_de1_e1 = FOREACH (JOIN d1_de1 BY emp_no, e1 BY emp_no) GENERATE d1_de1::d1::dept_no,e1::emp_no;
dept_emp_count = FOREACH (GROUP d1_de1_e1 BY dept_no) GENERATE group AS dept_no, COUNT(d1_de1_e1) AS employees;

dept_info_0 = JOIN dept_mgr BY dept_no, dept_emp_count BY dept_no;
dept_info_1 = FOREACH dept_info_0 GENERATE dept_mgr::dept_name AS dept_name, dept_mgr::manager AS manager, dept_emp_count::employees AS employees;
dept_info_2 = ORDER dept_info_0 BY dept_name;

DUMP dept_info_2;

[donghua@cdh-vm temp]$ date;pig -4 log4j.properties emp.pig;date;

Sun Feb 18 22:09:44 +08 2018

(d009,Customer Service,Yuchang Weedman,d009,17569)
(d005,Development,Leon DasSarma,d005,61386)
(d002,Finance,Isamu Legleitner,d002,12437)
(d003,Human Resources,Karsten Sigstam,d003,12898)
(d001,Marketing,Vishwani Minakawa,d001,14842)
(d004,Production,Oscar Ghazalie,d004,53304)
(d006,Quality Management,Dung Pesch,d006,14546)
(d008,Research,Hilary Kambil,d008,15441)
(d007,Sales,Hauke Zhang,d007,37701)

Sun Feb 18 22:16:08 +08 2018

↧

Apache Reverse Proxy Example for Cloudera Yarn

February 20, 2018, 4:48 am

≫ Next: Example to load CSV with newline characters within data into Hadoop tables

≪ Previous: Complex data process in SQL vs Pig

[root@cdh-vm conf.d]# cat /etc/httpd/conf.d/reverse.conf
# Yarn Resource Manager
Listen 192.168.31.14:8088

ProxyPreserveHost On
ProxyPass / http://cdh-vm.dbaglobe.com:8088/
ProxyPassReverse / http://cdh-vm.dbaglobe.com:8088/

# Yarn Node Manager
Listen 192.168.31.14:8042

ProxyPreserveHost On
ProxyPass / http://cdh-vm.dbaglobe.com:8042/
ProxyPassReverse / http://cdh-vm.dbaglobe.com:8042/

# Yarn JobHistory Server
Listen 192.168.31.14:19888

ProxyPreserveHost On
ProxyPass / http://cdh-vm.dbaglobe.com:19888/
ProxyPassReverse / http://cdh-vm.dbaglobe.com:19888/

↧

Example to load CSV with newline characters within data into Hadoop tables

February 22, 2018, 7:38 am

≫ Next: Hive Beeline default configuration for CDH 5.14

≪ Previous: Apache Reverse Proxy Example for Cloudera Yarn

[donghua@cdh-vm source]$ cat newline.txt
id,text
1,"a
b"
2,"c"
3,"新年快乐"

[donghua@cdh-vm source]$ cat convert_csv_to_parquet.py
import pandas as pd
import pyarrow as pa
import pyarrow.parquet as pq
df = pd.read_csv('newline.txt')
# Convert from pandas to Arrow
table = pa.Table.from_pandas(df)
pq.write_table(table,'newline.parquet')

[donghua@cdh-vm source]$ python convert_csv_to_parquet.py

[donghua@cdh-vm source]$ parquet-tools cat -j newline.parquet
{"id":1,"text":"YQpi","__index_level_0__":0}
{"id":2,"text":"Yw==","__index_level_0__":1}
{"id":3,"text":"5paw5bm05b+r5LmQ","__index_level_0__":2}

[donghua@cdh-vm source]$ hdfs dfs -mkdir tbl_newline_parquet
[donghua@cdh-vm source]$ hdfs dfs -put newline.parquet tbl_newline_parquet/

0: jdbc:hive2://cdh-vm.dbaglobe.com:10000/def> -- Hive syntax
0: jdbc:hive2://cdh-vm.dbaglobe.com:10000/def> create external table tbl_newline_3
. . . . . . . . . . . . . . . . . . . . . . .> (id bigint, text string)
. . . . . . . . . . . . . . . . . . . . . . .> stored as parquet
. . . . . . . . . . . . . . . . . . . . . . .> location '/user/donghua/tbl_newline_parquet';
No rows affected (0.114 seconds)
0: jdbc:hive2://cdh-vm.dbaglobe.com:10000/def> select * from tbl_newline_3;
+-------------------+---------------------+--+
| tbl_newline_3.id | tbl_newline_3.text |
+-------------------+---------------------+--+
| 1 | a
b |
| 2 | c |
| 3 | 新年快乐 |
+-------------------+---------------------+--+
3 rows selected (0.132 seconds)

[cdh-vm.dbaglobe.com:21000] > -- impala syntax
> create external table tbl_newline_2
> LIKE PARQUET '/user/donghua/tbl_newline_parquet/newline.parquet'
> stored as parquet
> location '/user/donghua/tbl_newline_parquet';

[cdh-vm.dbaglobe.com:21000] > desc tbl_newline_2;
+-------------------+--------+-----------------------------+
| name | type | comment |
+-------------------+--------+-----------------------------+
| id | bigint | Inferred from Parquet file. |
| text | string | Inferred from Parquet file. |
| __index_level_0__ | bigint | Inferred from Parquet file. |
+-------------------+--------+-----------------------------+
Fetched 3 row(s) in 0.02s

[cdh-vm.dbaglobe.com:21000] > select * from tbl_newline_2;
+----+----------+-------------------+
| id | text | __index_level_0__ |
+----+----------+-------------------+
| 1 | a | 0 |
| | b | |
| 2 | c | 1 |
| 3 | 新年快乐 | 2 |
+----+----------+-------------------+
Fetched 3 row(s) in 5.25s

↧

Hive Beeline default configuration for CDH 5.14

February 26, 2018, 6:11 am

≫ Next: How merged Cells in Excel being handled by Pandas data frame

≪ Previous: Example to load CSV with newline characters within data into Hadoop tables

[donghua@cdh-vm scripts]$ beeline -u jdbc:hive2://cdh-vm.dbaglobe.com:10000/test -n donghua << EOD |grep -v hive-exec-core.jar

> ! set headerinterval 10000

> ! set outputformat csv2

> set

> EOD

Java HotSpot(TM) 64-Bit Server VM warning: ignoring option MaxPermSize=512M; support was removed in 8.0

Java HotSpot(TM) 64-Bit Server VM warning: ignoring option MaxPermSize=512M; support was removed in 8.0

scan complete in 1ms

Connecting to jdbc:hive2://cdh-vm.dbaglobe.com:10000/test

Connected to: Apache Hive (version 1.1.0-cdh5.14.0)

Driver: Hive JDBC (version 1.1.0-cdh5.14.0)

Transaction isolation: TRANSACTION_REPEATABLE_READ

Beeline version 1.1.0-cdh5.14.0 by Apache Hive

0: jdbc:hive2://cdh-vm.dbaglobe.com:10000/tes> ! set headerinterval 10000

0: jdbc:hive2://cdh-vm.dbaglobe.com:10000/tes> ! set outputformat csv2

0: jdbc:hive2://cdh-vm.dbaglobe.com:10000/tes> set

. . . . . . . . . . . . . . . . . . . . . . .> set

_hive.hdfs.session.path=/tmp/hive/donghua/d4b067f0-b697-48ff-8223-3f4b527f090c

_hive.local.session.path=/tmp/hive/d4b067f0-b697-48ff-8223-3f4b527f090c

_hive.tmp_table_space=/tmp/hive/donghua/d4b067f0-b697-48ff-8223-3f4b527f090c/_tmp_space.db

datanucleus.autoCreateSchema=true

datanucleus.autoStartMechanismMode=checked

datanucleus.cache.level2=false

datanucleus.cache.level2.type=none

datanucleus.connectionPoolingType=BONECP

datanucleus.fixedDatastore=false

datanucleus.identifierFactory=datanucleus1

datanucleus.plugin.pluginRegistryBundleCheck=LOG

datanucleus.rdbms.useLegacyNativeValueStrategy=true

datanucleus.storeManagerType=rdbms

datanucleus.transactionIsolation=read-committed

datanucleus.validateColumns=false

datanucleus.validateConstraints=false

datanucleus.validateTables=false

fs.har.impl=org.apache.hadoop.hive.shims.HiveHarFileSystem

fs.scheme.class=dfs

hadoop.bin.path=/opt/cloudera/parcels/CDH-5.14.0-1.cdh5.14.0.p0.24/lib/hadoop/bin/hadoop

hive.analyze.stmt.collect.partlevel.stats=true

hive.archive.enabled=false

hive.auto.convert.join=true

hive.auto.convert.join.noconditionaltask=true

hive.auto.convert.join.noconditionaltask.size=20971520

hive.auto.convert.join.use.nonstaged=false

hive.auto.convert.sortmerge.join=false

hive.auto.convert.sortmerge.join.bigtable.selection.policy=org.apache.hadoop.hive.ql.optimizer.AvgPartitionSizeBasedBigTableSelectorForAutoSMJ

hive.auto.convert.sortmerge.join.to.mapjoin=false

hive.auto.progress.timeout=0s

hive.autogen.columnalias.prefix.includefuncname=false

hive.autogen.columnalias.prefix.label=_c

hive.binary.record.max.length=1000

hive.blobstore.optimizations.enabled=true

hive.blobstore.supported.schemes=s3,s3a,s3n

hive.blobstore.use.blobstore.as.scratchdir=false

hive.cache.expr.evaluation=true

hive.cbo.enable=false

hive.cli.errors.ignore=false

hive.cli.pretty.output.num.cols=-1

hive.cli.print.current.db=false

hive.cli.print.header=false

hive.cli.prompt=hive

hive.cluster.delegation.token.store.class=org.apache.hadoop.hive.thrift.MemoryTokenStore

hive.cluster.delegation.token.store.zookeeper.znode=/hivedelegation

hive.compactor.abortedtxn.threshold=1000

hive.compactor.check.interval=300s

hive.compactor.cleaner.run.interval=5000ms

hive.compactor.delta.num.threshold=10

hive.compactor.delta.pct.threshold=0.1

hive.compactor.initiator.on=false

hive.compactor.worker.threads=0

hive.compactor.worker.timeout=86400s

hive.compat=0.12

hive.compute.query.using.stats=false

hive.compute.splits.in.am=true

hive.conf.hidden.list=javax.jdo.option.ConnectionPassword,hive.server2.keystore.password,fs.s3.awsAccessKeyId,fs.s3.awsSecretAccessKey,fs.s3n.awsAccessKeyId,fs.s3n.awsSecretAccessKey,fs.s3a.access.key,fs.s3a.secret.key,fs.s3a.proxy.password,dfs.adls.oauth2.credential,fs.adl.oauth2.credential

hive.conf.restricted.list=hive.security.authenticator.manager,hive.security.authorization.manager,hive.users.in.admin.role,hadoop.bin.path,yarn.bin.path,_hive.local.session.path,_hive.hdfs.session.path,_hive.tmp_table_space,_hive.local.session.path,_hive.hdfs.session.path,_hive.tmp_table_space

hive.conf.validation=true

hive.convert.join.bucket.mapjoin.tez=false

hive.counters.group.name=HIVE

hive.debug.localtask=false

hive.decode.partition.name=false

hive.default.fileformat=TextFile

hive.default.rcfile.serde=org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe

hive.default.serde=org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe

hive.display.partition.cols.separately=true

hive.downloaded.resources.dir=/tmp/${hive.session.id}_resources

hive.enforce.bucketing=false

hive.enforce.bucketmapjoin=false

hive.enforce.sorting=false

hive.enforce.sortmergebucketmapjoin=false

hive.entity.capture.input.URI=true

hive.entity.capture.transform=false

hive.entity.separator=@

hive.error.on.empty.partition=false

hive.exec.check.crossproducts=true

hive.exec.compress.intermediate=false

hive.exec.compress.output=false

hive.exec.concatenate.check.index=true

hive.exec.copyfile.maxsize=33554432

hive.exec.counters.pull.interval=1000

hive.exec.default.partition.name=__HIVE_DEFAULT_PARTITION__

hive.exec.drop.ignorenonexistent=true

hive.exec.dynamic.partition=true

hive.exec.dynamic.partition.mode=strict

hive.exec.infer.bucket.sort=false

hive.exec.infer.bucket.sort.num.buckets.power.two=false

hive.exec.input.listing.max.threads=15

hive.exec.job.debug.capture.stacktraces=true

hive.exec.job.debug.timeout=30000

hive.exec.local.scratchdir=/tmp/hive

hive.exec.max.created.files=100000

hive.exec.max.dynamic.partitions=1000

hive.exec.max.dynamic.partitions.pernode=100

hive.exec.mode.local.auto=false

hive.exec.mode.local.auto.input.files.max=4

hive.exec.mode.local.auto.inputbytes.max=134217728

hive.exec.orc.block.padding.tolerance=0.05

hive.exec.orc.compression.strategy=SPEED

hive.exec.orc.default.block.padding=true

hive.exec.orc.default.block.size=268435456

hive.exec.orc.default.buffer.size=262144

hive.exec.orc.default.compress=ZLIB

hive.exec.orc.default.row.index.stride=10000

hive.exec.orc.default.stripe.size=67108864

hive.exec.orc.dictionary.key.size.threshold=0.8

hive.exec.orc.encoding.strategy=SPEED

hive.exec.orc.memory.pool=0.5

hive.exec.orc.skip.corrupt.data=false

hive.exec.orc.zerocopy=false

hive.exec.parallel=false

hive.exec.parallel.thread.number=8

hive.exec.perf.logger=org.apache.hadoop.hive.ql.log.PerfLogger

hive.exec.rcfile.use.explicit.header=true

hive.exec.rcfile.use.sync.cache=true

hive.exec.reducers.bytes.per.reducer=67108864

hive.exec.reducers.max=1099

hive.exec.rowoffset=false

hive.exec.scratchdir=/tmp/hive

hive.exec.script.allow.partial.consumption=false

hive.exec.script.maxerrsize=100000

hive.exec.script.trust=false

hive.exec.show.job.failure.debug.info=true

hive.exec.stagingdir=.hive-staging

hive.exec.submit.local.task.via.child=true

hive.exec.submitviachild=false

hive.exec.tasklog.debug.timeout=20000

hive.execution.engine=mr

hive.exim.strict.repl.tables=true

hive.exim.uri.scheme.whitelist=hdfs,pfile,s3,s3a,adl

hive.explain.dependency.append.tasktype=false

hive.fetch.output.serde=org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe

hive.fetch.task.aggr=false

hive.fetch.task.conversion=minimal

hive.fetch.task.conversion.threshold=268435456

hive.file.max.footer=100

hive.fileformat.check=true

hive.groupby.mapaggr.checkinterval=100000

hive.groupby.orderby.position.alias=false

hive.groupby.skewindata=false

hive.hashtable.initialCapacity=100000

hive.hashtable.key.count.adjustment=1.0

hive.hashtable.loadfactor=0.75

hive.hbase.generatehfiles=false

hive.hbase.snapshot.restoredir=/tmp

hive.hbase.wal.enabled=true

hive.heartbeat.interval=1000

hive.hmshandler.force.reload.conf=false

hive.hmshandler.retry.attempts=10

hive.hmshandler.retry.interval=2000ms

hive.hwi.listen.host=0.0.0.0

hive.hwi.listen.port=9999

hive.hwi.war.file=${env:HWI_WAR_FILE}

hive.ignore.mapjoin.hint=true

hive.in.test=false

hive.in.test.remove.logs=true

hive.in.test.short.logs=false

hive.in.tez.test=false

hive.index.compact.binary.search=true

hive.index.compact.file.ignore.hdfs=false

hive.index.compact.query.max.entries=10000000

hive.index.compact.query.max.size=10737418240

hive.input.format=org.apache.hadoop.hive.ql.io.CombineHiveInputFormat

hive.insert.into.external.tables=true

hive.insert.into.multilevel.dirs=false

hive.io.rcfile.column.number.conf=0

hive.io.rcfile.record.buffer.size=4194304

hive.io.rcfile.record.interval=2147483647

hive.io.rcfile.tolerate.corruptions=false

hive.jobname.length=50

hive.join.cache.size=25000

hive.join.emit.interval=1000

hive.lazysimple.extended_boolean_literal=false

hive.limit.optimize.enable=false

hive.limit.optimize.fetch.max=50000

hive.limit.optimize.limit.file=10

hive.limit.pushdown.memory.usage=0.1

hive.limit.query.max.table.partition=-1

hive.limit.row.max.size=100000

hive.load.dynamic.partitions.thread=15

hive.localize.resource.num.wait.attempts=5

hive.localize.resource.wait.interval=5000ms

hive.lock.manager=org.apache.hadoop.hive.ql.lockmgr.zookeeper.ZooKeeperHiveLockManager

hive.lock.mapred.only.operation=false

hive.lock.numretries=100

hive.lock.query.string.max.length=1000000

hive.lock.sleep.between.retries=60s

hive.lockmgr.zookeeper.default.partition.name=__HIVE_DEFAULT_ZOOKEEPER_PARTITION__

hive.log.explain.output=false

hive.map.aggr=true

hive.map.aggr.hash.force.flush.memory.threshold=0.9

hive.map.aggr.hash.min.reduction=0.5

hive.map.aggr.hash.percentmemory=0.5

hive.map.groupby.sorted=false

hive.map.groupby.sorted.testmode=false

hive.mapjoin.bucket.cache.size=100

hive.mapjoin.check.memory.rows=100000

hive.mapjoin.followby.gby.localtask.max.memory.usage=0.55

hive.mapjoin.followby.map.aggr.hash.percentmemory=0.3

hive.mapjoin.localtask.max.memory.usage=0.9

hive.mapjoin.optimized.hashtable=true

hive.mapjoin.optimized.hashtable.wbsize=10485760

hive.mapjoin.smalltable.filesize=25000000

hive.mapper.cannot.span.multiple.partitions=false

hive.mapred.local.mem=0

hive.mapred.mode=nonstrict

hive.mapred.partitioner=org.apache.hadoop.hive.ql.io.DefaultHivePartitioner

hive.mapred.reduce.tasks.speculative.execution=true

hive.mapred.supports.subdirectories=false

hive.merge.mapfiles=true

hive.merge.mapredfiles=false

hive.merge.orcfile.stripe.level=true

hive.merge.rcfile.block.level=true

hive.merge.size.per.task=268435456

hive.merge.smallfiles.avgsize=16777216

hive.merge.sparkfiles=true

hive.merge.tezfiles=false

hive.metadata.move.exported.metadata.to.trash=true

hive.metastore.archive.intermediate.archived=_INTERMEDIATE_ARCHIVED

hive.metastore.archive.intermediate.extracted=_INTERMEDIATE_EXTRACTED

hive.metastore.archive.intermediate.original=_INTERMEDIATE_ORIGINAL

hive.metastore.authorization.storage.checks=false

hive.metastore.batch.retrieve.max=300

hive.metastore.batch.retrieve.table.partition.max=1000

hive.metastore.cache.pinobjtypes=Table,StorageDescriptor,SerDeInfo,Partition,Database,Type,FieldSchema,Order

hive.metastore.client.connect.retry.delay=1s

hive.metastore.client.socket.timeout=300

hive.metastore.connect.retries=3

hive.metastore.direct.sql.batch.size=0

hive.metastore.disallow.incompatible.col.type.changes=false

hive.metastore.dml.events=false

hive.metastore.event.clean.freq=0s

hive.metastore.event.db.listener.timetolive=86400s

hive.metastore.event.expiry.duration=0s

hive.metastore.execute.setugi=true

hive.metastore.expression.proxy=org.apache.hadoop.hive.ql.optimizer.ppr.PartitionExpressionForMetastore

hive.metastore.failure.retries=1

hive.metastore.filter.hook=org.apache.hadoop.hive.metastore.DefaultMetaStoreFilterHookImpl

hive.metastore.fs.handler.class=org.apache.hadoop.hive.metastore.HiveMetaStoreFsImpl

hive.metastore.fshandler.threads=15

hive.metastore.initial.metadata.count.enabled=true

hive.metastore.integral.jdo.pushdown=false

hive.metastore.kerberos.principal=hive-metastore/_HOST@EXAMPLE.COM

hive.metastore.limit.partition.request=-1

hive.metastore.metrics.enabled=false

hive.metastore.orm.retrieveMapNullsAsEmptyStrings=false

hive.metastore.rawstore.impl=org.apache.hadoop.hive.metastore.ObjectStore

hive.metastore.sasl.enabled=false

hive.metastore.schema.info.class=org.apache.hadoop.hive.metastore.CDHMetaStoreSchemaInfo

hive.metastore.schema.verification=false

hive.metastore.schema.verification.record.version=true

hive.metastore.server.max.message.size=104857600

hive.metastore.server.max.threads=1000

hive.metastore.server.min.threads=200

hive.metastore.server.tcp.keepalive=true

hive.metastore.thrift.compact.protocol.enabled=false

hive.metastore.thrift.framed.transport.enabled=false

hive.metastore.try.direct.sql=true

hive.metastore.try.direct.sql.ddl=true

hive.metastore.uris=thrift://cdh-vm.dbaglobe.com:9083

hive.metastore.use.SSL=false

hive.metastore.warehouse.dir=/user/hive/warehouse

hive.msck.path.validation=throw

hive.msck.repair.batch.size=0

hive.multi.insert.move.tasks.share.dependencies=false

hive.multigroupby.singlereducer=true

hive.mv.files.thread=15

hive.new.job.grouping.set.cardinality=30

hive.optimize.bucketingsorting=true

hive.optimize.bucketmapjoin=false

hive.optimize.bucketmapjoin.sortedmerge=false

hive.optimize.constant.propagation=true

hive.optimize.correlation=false

hive.optimize.distinct.rewrite=true

hive.optimize.groupby=true

hive.optimize.index.autoupdate=false

hive.optimize.index.filter=true

hive.optimize.index.filter.compact.maxsize=-1

hive.optimize.index.filter.compact.minsize=5368709120

hive.optimize.index.groupby=false

hive.optimize.listbucketing=false

hive.optimize.metadataonly=true

hive.optimize.null.scan=true

hive.optimize.ppd=true

hive.optimize.ppd.storage=true

hive.optimize.reducededuplication=true

hive.optimize.reducededuplication.min.reducer=4

hive.optimize.remove.identity.project=true

hive.optimize.sampling.orderby=false

hive.optimize.sampling.orderby.number=1000

hive.optimize.sampling.orderby.percent=0.1

hive.optimize.skewjoin=false

hive.optimize.skewjoin.compiletime=false

hive.optimize.sort.dynamic.partition=false

hive.optimize.union.remove=false

hive.orc.cache.stripe.details.size=10000

hive.orc.compute.splits.num.threads=10

hive.orc.row.index.stride.dictionary.check=true

hive.orc.splits.include.file.footer=false

hive.outerjoin.supports.filters=true

hive.parquet.timestamp.skip.conversion=true

hive.plan.serialization.format=kryo

hive.ppd.recognizetransivity=true

hive.ppd.remove.duplicatefilters=true

hive.prewarm.enabled=false

hive.prewarm.numcontainers=10

hive.prewarm.spark.timeout=5000ms

hive.query.result.fileformat=TextFile

hive.query.timeout.seconds=0s

hive.querylog.enable.plan.progress=true

hive.querylog.location=/tmp/hive

hive.querylog.plan.progress.interval=60000ms

hive.reorder.nway.joins=true

hive.resultset.use.unique.column.names=true

hive.rework.mapredwork=false

hive.rpc.query.plan=false

hive.sample.seednumber=0

hive.scratch.dir.permission=700

hive.scratchdir.lock=false

hive.script.auto.progress=false

hive.script.operator.env.blacklist=hive.txn.valid.txns,hive.script.operator.env.blacklist

hive.script.operator.id.env.var=HIVE_SCRIPT_OPERATOR_ID

hive.script.operator.truncate.env=false

hive.script.recordreader=org.apache.hadoop.hive.ql.exec.TextRecordReader

hive.script.recordwriter=org.apache.hadoop.hive.ql.exec.TextRecordWriter

hive.script.serde=org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe

hive.security.authenticator.manager=org.apache.hadoop.hive.ql.security.HadoopDefaultAuthenticator

hive.security.authorization.enabled=false

hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.DefaultHiveAuthorizationProvider

hive.security.authorization.sqlstd.confwhitelist=hive\.auto\..*|hive\.cbo\..*|hive\.convert\..*|hive\.exec\.dynamic\.partition.*|hive\.exec\..*\.dynamic\.partitions\..*|hive\.exec\.compress\..*|hive\.exec\.infer\..*|hive\.exec\.mode.local\..*|hive\.exec\.orc\..*|hive\.fetch.task\..*|hive\.hbase\..*|hive\.index\..*|hive\.index\..*|hive\.intermediate\..*|hive\.join\..*|hive\.limit\..*|hive\.mapjoin\..*|hive\.merge\..*|hive\.optimize\..*|hive\.orc\..*|hive\.outerjoin\..*|hive\.ppd\..*|hive\.prewarm\..*|hive\.skewjoin\..*|hive\.smbjoin\..*|hive\.stats\..*|hive\.tez\..*|hive\.vectorized\..*|mapred\.map\..*|mapred\.reduce\..*|mapred\.output\.compression\.codec|mapreduce\.job\.reduce\.slowstart\.completedmaps|mapreduce\.job\.queuename|mapreduce\.input\.fileinputformat\.split\.minsize|mapreduce\.map\..*|mapreduce\.reduce\..*|tez\.am\..*|tez\.task\..*|tez\.runtime\..*|hive\.exec\.reducers\.bytes\.per\.reducer|hive\.client\.stats\.counters|hive\.exec\.default\.partition\.name|hive\.exec\.drop\.ignorenonexistent|hive\.counters\.group\.name|hive\.enforce\.bucketing|hive\.enforce\.bucketmapjoin|hive\.enforce\.sorting|hive\.enforce\.sortmergebucketmapjoin|hive\.cache\.expr\.evaluation|hive\.groupby\.skewindata|hive\.hashtable\.loadfactor|hive\.hashtable\.initialCapacity|hive\.ignore\.mapjoin\.hint|hive\.limit\.row\.max\.size|hive\.mapred\.mode|hive\.map\.aggr|hive\.compute\.query\.using\.stats|hive\.exec\.rowoffset|hive\.variable\.substitute|hive\.variable\.substitute\.depth|hive\.autogen\.columnalias\.prefix\.includefuncname|hive\.autogen\.columnalias\.prefix\.label|hive\.exec\.check\.crossproducts|hive\.compat|hive\.exec\.concatenate\.check\.index|hive\.display\.partition\.cols\.separately|hive\.error\.on\.empty\.partition|hive\.execution\.engine|hive\.exim\.uri\.scheme\.whitelist|hive\.file\.max\.footer|hive\.mapred\.supports\.subdirectories|hive\.insert\.into\.multilevel\.dirs|hive\.localize\.resource\.num\.wait\.attempts|hive\.multi\.insert\.move\.tasks\.share\.dependencies|hive\.support\.quoted\.identifiers|hive\.resultset\.use\.unique\.column\.names|hive\.analyze\.stmt\.collect\.partlevel\.stats|hive\.exec\.job\.debug\.capture\.stacktraces|hive\.exec\.job\.debug\.timeout|hive\.exec\.max\.created\.files|hive\.exec\.reducers\.max|hive\.output\.file\.extension|hive\.exec\.show\.job\.failure\.debug\.info|hive\.exec\.tasklog\.debug\.timeout|hive\.query\.id

hive.security.authorization.task.factory=org.apache.hadoop.hive.ql.parse.authorization.RestrictedHiveAuthorizationTaskFactoryImpl

hive.security.command.whitelist=set,reset,dfs,add,list,delete,reload,compile

hive.security.metastore.authenticator.manager=org.apache.hadoop.hive.ql.security.HadoopDefaultMetastoreAuthenticator

hive.security.metastore.authorization.auth.reads=true

hive.security.metastore.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.DefaultHiveMetastoreAuthorizationProvider

hive.serdes.using.metastore.for.schema=org.apache.hadoop.hive.ql.io.orc.OrcSerde,org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe,org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe,org.apache.hadoop.hive.serde2.dynamic_type.DynamicSerDe,org.apache.hadoop.hive.serde2.MetadataTypedColumnsetSerDe,org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe,org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe,org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe

hive.server.read.socket.timeout=10s

hive.server.tcp.keepalive=true

hive.server2.allow.user.substitution=true

hive.server2.async.exec.async.compile=false

hive.server2.async.exec.keepalive.time=10s

hive.server2.async.exec.shutdown.timeout=10s

hive.server2.async.exec.threads=100

hive.server2.async.exec.wait.queue.size=100

hive.server2.authentication=NONE

hive.server2.authentication.ldap.groupClassKey=groupOfNames

hive.server2.authentication.ldap.groupMembershipKey=member

hive.server2.authentication.ldap.guidKey=uid

hive.server2.clear.dangling.scratchdir=false

hive.server2.clear.dangling.scratchdir.interval=1800s

hive.server2.compile.lock.timeout=0s

hive.server2.enable.doAs=true

hive.server2.global.init.file.location=/run/cloudera-scm-agent/process/385-hive-HIVESERVER2

hive.server2.idle.operation.timeout=21600000

hive.server2.idle.session.check.operation=false

hive.server2.idle.session.timeout=43200000

hive.server2.idle.session.timeout_check_operation=true

hive.server2.logging.operation.enabled=true

hive.server2.logging.operation.level=EXECUTION

hive.server2.logging.operation.log.location=/var/log/hive/operation_logs

hive.server2.long.polling.timeout=5000ms

hive.server2.map.fair.scheduler.queue=true

hive.server2.max.start.attempts=30

hive.server2.metrics.enabled=true

hive.server2.session.check.interval=900000

hive.server2.sleep.interval.between.start.attempts=60s

hive.server2.support.dynamic.service.discovery=false

hive.server2.table.type.mapping=CLASSIC

hive.server2.tez.initialize.default.sessions=false

hive.server2.tez.sessions.per.default.queue=1

hive.server2.thrift.bind.host=cdh-vm.dbaglobe.com

hive.server2.thrift.exponential.backoff.slot.length=100ms

hive.server2.thrift.http.max.idle.time=1800s

hive.server2.thrift.http.max.worker.threads=500

hive.server2.thrift.http.min.worker.threads=5

hive.server2.thrift.http.path=cliservice

hive.server2.thrift.http.port=10001

hive.server2.thrift.http.worker.keepalive.time=60s

hive.server2.thrift.login.timeout=20s

hive.server2.thrift.max.message.size=104857600

hive.server2.thrift.max.worker.threads=100

hive.server2.thrift.min.worker.threads=5

hive.server2.thrift.port=10000

hive.server2.thrift.sasl.qop=auth

hive.server2.thrift.worker.keepalive.time=60s

hive.server2.transport.mode=binary

hive.server2.use.SSL=false

hive.server2.webui.host=0.0.0.0

hive.server2.webui.max.historic.queries=25

hive.server2.webui.max.threads=50

hive.server2.webui.port=10002

hive.server2.webui.spnego.principal=HTTP/_HOST@EXAMPLE.COM

hive.server2.webui.use.spnego=false

hive.server2.webui.use.ssl=false

hive.server2.zookeeper.namespace=hiveserver2

hive.service.metrics.class=org.apache.hadoop.hive.common.metrics.metrics2.CodahaleMetrics

hive.service.metrics.file.frequency=30000

hive.service.metrics.file.location=/var/log/hive/metrics-hiveserver2/metrics.log

hive.service.metrics.reporter=JSON_FILE, JMX

hive.session.history.enabled=false

hive.session.id=d4b067f0-b697-48ff-8223-3f4b527f090c

hive.session.silent=false

hive.skewjoin.key=100000

hive.skewjoin.mapjoin.map.tasks=10000

hive.skewjoin.mapjoin.min.split=33554432

hive.smbjoin.cache.rows=10000

hive.spark.client.connect.timeout=1000ms

hive.spark.client.future.timeout=60s

hive.spark.client.rpc.max.size=52428800

hive.spark.client.rpc.sasl.mechanisms=DIGEST-MD5

hive.spark.client.rpc.threads=8

hive.spark.client.secret.bits=256

hive.spark.client.server.connect.timeout=90000ms

hive.spark.dynamic.partition.pruning=false

hive.spark.dynamic.partition.pruning.map.join.only=false

hive.spark.dynamic.partition.pruning.max.data.size=104857600

hive.spark.job.monitor.timeout=60s

hive.ssl.protocol.blacklist=SSLv2,SSLv3

hive.stageid.rearrange=none

hive.start.cleanup.scratchdir=false

hive.stats.atomic=false

hive.stats.autogather=true

hive.stats.collect.rawdatasize=true

hive.stats.collect.scancols=true

hive.stats.collect.tablekeys=false

hive.stats.dbclass=fs

hive.stats.dbconnectionstring=jdbc:derby:;databaseName=TempStatsStore;create=true

hive.stats.deserialization.factor=1.0

hive.stats.fetch.column.stats=true

hive.stats.fetch.partition.stats=true

hive.stats.gather.num.threads=10

hive.stats.jdbc.timeout=30s

hive.stats.jdbcdriver=org.apache.derby.jdbc.EmbeddedDriver

hive.stats.join.factor=1.1

hive.stats.key.prefix.max.length=150

hive.stats.key.prefix.reserve.length=24

hive.stats.list.num.entries=10

hive.stats.map.num.entries=10

hive.stats.max.variable.length=100

hive.stats.ndv.error=20.0

hive.stats.reliable=false

hive.stats.retries.max=0

hive.stats.retries.wait=3000ms

hive.support.concurrency=true

hive.support.quoted.identifiers=column

hive.test.authz.sstd.hs2.mode=false

hive.test.mode=false

hive.test.mode.prefix=test_

hive.test.mode.samplefreq=32

hive.tez.auto.reducer.parallelism=false

hive.tez.container.size=-1

hive.tez.cpu.vcores=-1

hive.tez.dynamic.partition.pruning=true

hive.tez.dynamic.partition.pruning.max.data.size=104857600

hive.tez.dynamic.partition.pruning.max.event.size=1048576

hive.tez.exec.inplace.progress=true

hive.tez.exec.print.summary=false

hive.tez.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat

hive.tez.log.level=INFO

hive.tez.max.partition.factor=2.0

hive.tez.min.partition.factor=0.25

hive.tez.smb.number.waves=0.5

hive.transform.escape.input=false

hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DummyTxnManager

hive.txn.max.open.batch=1000

hive.txn.timeout=300s

hive.typecheck.on.insert=true

hive.udtf.auto.progress=false

hive.unlock.numretries=10

hive.user.install.directory=hdfs:///user/

hive.variable.substitute=true

hive.variable.substitute.depth=40

hive.vectorized.execution.enabled=true

hive.vectorized.execution.reduce.enabled=false

hive.vectorized.execution.reduce.groupby.enabled=true

hive.vectorized.groupby.checkinterval=4096

hive.vectorized.groupby.flush.percent=0.1

hive.vectorized.groupby.maxentries=1000000

hive.warehouse.subdir.inherit.perms=true

hive.zookeeper.clean.extra.nodes=false

hive.zookeeper.client.port=2181

hive.zookeeper.connection.basesleeptime=1000ms

hive.zookeeper.connection.max.retries=3

hive.zookeeper.namespace=hive_zookeeper_namespace_hive

hive.zookeeper.quorum=cdh-vm.dbaglobe.com

hive.zookeeper.session.timeout=1200000ms

javax.jdo.PersistenceManagerFactoryClass=org.datanucleus.api.jdo.JDOPersistenceManagerFactory

javax.jdo.option.ConnectionDriverName=org.apache.derby.jdbc.EmbeddedDriver

javax.jdo.option.ConnectionURL=jdbc:derby:;databaseName=metastore_db;create=true

javax.jdo.option.ConnectionUserName=APP

javax.jdo.option.DetachAllOnCommit=true

javax.jdo.option.Multithreaded=true

javax.jdo.option.NonTransactionalRead=true

mapreduce.input.fileinputformat.input.dir.recursive=false

mapreduce.input.fileinputformat.split.maxsize=256000000

mapreduce.input.fileinputformat.split.minsize=1

mapreduce.input.fileinputformat.split.minsize.per.node=1

mapreduce.input.fileinputformat.split.minsize.per.rack=1

mapreduce.job.committer.setup.cleanup.needed=false

mapreduce.job.committer.task.cleanup.needed=false

mapreduce.job.reduces=-1

mapreduce.reduce.speculative=true

output.formatter=org.apache.hadoop.hive.ql.exec.FetchFormatter$ThriftFormatter

output.protocol=6

parquet.memory.pool.ratio=0.5

rpc.engine.org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolPB=org.apache.hadoop.ipc.ProtobufRpcEngine

silent=off

spark.driver.memory=800000000

spark.dynamicAllocation.enabled=true

spark.dynamicAllocation.initialExecutors=1

spark.dynamicAllocation.maxExecutors=2147483647

spark.dynamicAllocation.minExecutors=1

spark.executor.cores=4

spark.executor.memory=1500000000

spark.master=yarn-cluster

spark.shuffle.service.enabled=true

spark.yarn.driver.memoryOverhead=102

spark.yarn.executor.memoryOverhead=614

startcode=1519649735704

stream.stderr.reporter.enabled=true

stream.stderr.reporter.prefix=reporter:

yarn.bin.path=/opt/cloudera/parcels/CDH-5.14.0-1.cdh5.14.0.p0.24/lib/hadoop/bin/yarn

env:CDH_AVRO_HOME=/opt/cloudera/parcels/CDH-5.14.0-1.cdh5.14.0.p0.24/lib/avro

env:CDH_CRUNCH_HOME=/usr/lib/crunch

env:CDH_FLUME_HOME=/opt/cloudera/parcels/CDH-5.14.0-1.cdh5.14.0.p0.24/lib/flume-ng

env:CDH_HADOOP_BIN=/opt/cloudera/parcels/CDH-5.14.0-1.cdh5.14.0.p0.24/lib/hadoop/bin/hadoop

env:CDH_HADOOP_HOME=/opt/cloudera/parcels/CDH-5.14.0-1.cdh5.14.0.p0.24/lib/hadoop

env:CDH_HBASE_HOME=/opt/cloudera/parcels/CDH-5.14.0-1.cdh5.14.0.p0.24/lib/hbase

env:CDH_HBASE_INDEXER_HOME=/opt/cloudera/parcels/CDH-5.14.0-1.cdh5.14.0.p0.24/lib/hbase-solr

env:CDH_HCAT_HOME=/opt/cloudera/parcels/CDH-5.14.0-1.cdh5.14.0.p0.24/lib/hive-hcatalog

env:CDH_HDFS_HOME=/opt/cloudera/parcels/CDH-5.14.0-1.cdh5.14.0.p0.24/lib/hadoop-hdfs

env:CDH_HIVE_HOME=/opt/cloudera/parcels/CDH-5.14.0-1.cdh5.14.0.p0.24/lib/hive

env:CDH_HTTPFS_HOME=/opt/cloudera/parcels/CDH-5.14.0-1.cdh5.14.0.p0.24/lib/hadoop-httpfs

env:CDH_HUE_HOME=/opt/cloudera/parcels/CDH-5.14.0-1.cdh5.14.0.p0.24/lib/hue

env:CDH_HUE_PLUGINS_HOME=/opt/cloudera/parcels/CDH-5.14.0-1.cdh5.14.0.p0.24/lib/hadoop

env:CDH_IMPALA_HOME=/opt/cloudera/parcels/CDH-5.14.0-1.cdh5.14.0.p0.24/lib/impala

env:CDH_KAFKA_HOME=/usr/lib/kafka

env:CDH_KMS_HOME=/opt/cloudera/parcels/CDH-5.14.0-1.cdh5.14.0.p0.24/lib/hadoop-kms

env:CDH_KUDU_HOME=/opt/cloudera/parcels/CDH-5.14.0-1.cdh5.14.0.p0.24/lib/kudu

env:CDH_LLAMA_HOME=/opt/cloudera/parcels/CDH-5.14.0-1.cdh5.14.0.p0.24/lib/llama

env:CDH_MR1_HOME=/opt/cloudera/parcels/CDH-5.14.0-1.cdh5.14.0.p0.24/lib/hadoop-0.20-mapreduce

env:CDH_MR2_HOME=/opt/cloudera/parcels/CDH-5.14.0-1.cdh5.14.0.p0.24/lib/hadoop-mapreduce

env:CDH_OOZIE_HOME=/opt/cloudera/parcels/CDH-5.14.0-1.cdh5.14.0.p0.24/lib/oozie

env:CDH_PARQUET_HOME=/opt/cloudera/parcels/CDH-5.14.0-1.cdh5.14.0.p0.24/lib/parquet

env:CDH_PIG_HOME=/opt/cloudera/parcels/CDH-5.14.0-1.cdh5.14.0.p0.24/lib/pig

env:CDH_SENTRY_HOME=/opt/cloudera/parcels/CDH-5.14.0-1.cdh5.14.0.p0.24/lib/sentry

env:CDH_SOLR_HOME=/opt/cloudera/parcels/CDH-5.14.0-1.cdh5.14.0.p0.24/lib/solr

env:CDH_SPARK_HOME=/opt/cloudera/parcels/CDH-5.14.0-1.cdh5.14.0.p0.24/lib/spark

env:CDH_SQOOP2_HOME=/opt/cloudera/parcels/CDH-5.14.0-1.cdh5.14.0.p0.24/lib/sqoop2

env:CDH_SQOOP_HOME=/usr/lib/sqoop

env:CDH_VERSION=5

env:CDH_YARN_HOME=/opt/cloudera/parcels/CDH-5.14.0-1.cdh5.14.0.p0.24/lib/hadoop-yarn

env:CDH_ZOOKEEPER_HOME=/opt/cloudera/parcels/CDH-5.14.0-1.cdh5.14.0.p0.24/lib/zookeeper

env:CGROUP_GROUP_BLKIO=

env:CGROUP_GROUP_CPU=

env:CGROUP_GROUP_CPUACCT=

env:CGROUP_GROUP_MEMORY=

env:CGROUP_ROOT_BLKIO=/sys/fs/cgroup/blkio

env:CGROUP_ROOT_CPU=/sys/fs/cgroup/cpu,cpuacct

env:CGROUP_ROOT_CPUACCT=/sys/fs/cgroup/cpu,cpuacct

env:CGROUP_ROOT_MEMORY=/sys/fs/cgroup/memory

env:CLOUDERA_MYSQL_CONNECTOR_JAR=/usr/share/java/mysql-connector-java.jar

env:CLOUDERA_ORACLE_CONNECTOR_JAR=/usr/share/java/oracle-connector-java.jar

env:CLOUDERA_POSTGRESQL_JDBC_JAR=/usr/share/cmf/lib/postgresql-9.0-801.jdbc4.jar

env:CMF_CONF_DIR=/etc/cloudera-scm-agent

env:CMF_PACKAGE_DIR=/usr/lib64/cmf/service

env:CM_ADD_TO_CP_DIRS=navigator/cdh57

env:CM_STATUS_CODES=STATUS_NONE HDFS_DFS_DIR_NOT_EMPTY HBASE_TABLE_DISABLED HBASE_TABLE_ENABLED JOBTRACKER_IN_STANDBY_MODE YARN_RM_IN_STANDBY_MODE

env:CONF_DIR=/run/cloudera-scm-agent/process/385-hive-HIVESERVER2

env:HADOOP_CLIENT_OPTS=-Djava.net.preferIPv4Stack=true -Djava.net.preferIPv4Stack=true -Xms4294967296 -Xmx4294967296 -XX:MaxPermSize=512M -XX:+UseParNewGC -XX:+UseConcMarkSweepGC -XX:CMSInitiatingOccupancyFraction=70 -XX:+CMSParallelRemarkEnabled -XX:+HeapDumpOnOutOfMemoryError -XX:HeapDumpPath=/tmp/hive_hive-HIVESERVER2-f2ddc1ecbff0faafbcffe8ebebc13cb1_pid9057.hprof -XX:OnOutOfMemoryError=/usr/lib64/cmf/service/common/killparent.sh

env:HADOOP_CONF_DIR=/run/cloudera-scm-agent/process/385-hive-HIVESERVER2/yarn-conf

env:HADOOP_HEAPSIZE=256

env:HADOOP_HOME=/opt/cloudera/parcels/CDH-5.14.0-1.cdh5.14.0.p0.24/lib/hadoop

env:HADOOP_HOME_WARN_SUPPRESS=true

env:HADOOP_MAPRED_HOME=/opt/cloudera/parcels/CDH-5.14.0-1.cdh5.14.0.p0.24/lib/hadoop-mapreduce

env:HADOOP_PREFIX=/opt/cloudera/parcels/CDH-5.14.0-1.cdh5.14.0.p0.24/lib/hadoop

env:HIVE_AUX_JARS_PATH=

env:HIVE_CONF_DIR=/run/cloudera-scm-agent/process/385-hive-HIVESERVER2

env:HIVE_DEFAULT_XML=/etc/hive/conf.dist/hive-default.xml

env:HIVE_HOME=/opt/cloudera/parcels/CDH-5.14.0-1.cdh5.14.0.p0.24/lib/hive

env:HIVE_LOGFILE=hadoop-cmf-hive-HIVESERVER2-cdh-vm.dbaglobe.com.log.out

env:HIVE_LOG_DIR=/var/log/hive

env:HIVE_METASTORE_DATABASE_TYPE=mysql

env:HIVE_ROOT_LOGGER=ERROR,RFA

env:HOME=/var/lib/hive

env:JAVA_HOME=/usr/java/jdk1.8.0_162

env:JSVC_HOME=/opt/cloudera/parcels/CDH-5.14.0-1.cdh5.14.0.p0.24/lib/bigtop-utils

env:KEYTRUSTEE_KP_HOME=/usr/share/keytrustee-keyprovider

env:KEYTRUSTEE_SERVER_HOME=/usr/lib/keytrustee-server

env:LANG=en_US.UTF-8

env:LD_LIBRARY_PATH=:/opt/cloudera/parcels/CDH-5.14.0-1.cdh5.14.0.p0.24/lib/hadoop/lib/native

env:MALLOC_ARENA_MAX=4

env:MGMT_HOME=/usr/share/cmf

env:ORACLE_HOME=/usr/share/oracle/instantclient

env:PARCELS_ROOT=/opt/cloudera/parcels

env:PARCEL_DIRNAMES=CDH-5.14.0-1.cdh5.14.0.p0.24

env:PATH=/sbin:/usr/sbin:/bin:/usr/bin

env:PWD=/run/cloudera-scm-agent/process/385-hive-HIVESERVER2

env:SCM_DEFINES_SCRIPTS=/opt/cloudera/parcels/CDH-5.14.0-1.cdh5.14.0.p0.24/meta/cdh_env.sh

env:SEARCH_HOME=/opt/cloudera/parcels/CDH-5.14.0-1.cdh5.14.0.p0.24/lib/search

env:SENTRY_HOME=/opt/cloudera/parcels/CDH-5.14.0-1.cdh5.14.0.p0.24/lib/sentry

env:SERVICE_LIST=beeline cleardanglingscratchdir cli help hiveburninclient hiveserver2 hiveserver hwi jar lineage metastore metatool orcfiledump rcfilecat schemaTool version 

env:SHELL=/bin/bash

env:SHLVL=2

env:SPARK_CONF_DIR=/etc/spark/conf

env:SPARK_HOME=/opt/cloudera/parcels/CDH-5.14.0-1.cdh5.14.0.p0.24/lib/spark

env:SPARK_ON_YARN=true

env:SUPERVISOR_ENABLED=1

env:SUPERVISOR_GROUP_NAME=385-hive-HIVESERVER2

env:SUPERVISOR_PROCESS_NAME=385-hive-HIVESERVER2

env:SUPERVISOR_SERVER_URL=unix:///run/cloudera-scm-agent/supervisor/supervisord.sock

env:TOMCAT_HOME=/opt/cloudera/parcels/CDH-5.14.0-1.cdh5.14.0.p0.24/lib/bigtop-tomcat

env:WEBHCAT_DEFAULT_XML=/opt/cloudera/parcels/CDH-5.14.0-1.cdh5.14.0.p0.24/etc/hive-webhcat/conf.dist/webhcat-default.xml

env:XDG_RUNTIME_DIR=/run/user/0

env:XDG_SESSION_ID=c2

env:YARN_OPTS=-Xmx825955249 -Djava.net.preferIPv4Stack=true 

system:awt.toolkit=sun.awt.X11.XToolkit

system:file.encoding=UTF-8

system:file.encoding.pkg=sun.io

system:file.separator=/

system:hadoop.home.dir=/opt/cloudera/parcels/CDH-5.14.0-1.cdh5.14.0.p0.24/lib/hadoop

system:hadoop.id.str=

system:hadoop.log.dir=/opt/cloudera/parcels/CDH-5.14.0-1.cdh5.14.0.p0.24/lib/hadoop/logs

system:hadoop.log.file=hadoop.log

system:hadoop.policy.file=hadoop-policy.xml

system:hadoop.root.logger=INFO,console

system:hadoop.security.logger=INFO,NullAppender

system:java.awt.graphicsenv=sun.awt.X11GraphicsEnvironment

system:java.awt.printerjob=sun.print.PSPrinterJob

system:java.class.version=52.0

system:java.endorsed.dirs=/usr/java/jdk1.8.0_162/jre/lib/endorsed

system:java.ext.dirs=/usr/java/jdk1.8.0_162/jre/lib/ext:/usr/java/packages/lib/ext

system:java.home=/usr/java/jdk1.8.0_162/jre

system:java.io.tmpdir=/tmp

system:java.library.path=/opt/cloudera/parcels/CDH-5.14.0-1.cdh5.14.0.p0.24/lib/hadoop/lib/native

system:java.net.preferIPv4Stack=true

system:java.runtime.name=Java(TM) SE Runtime Environment

system:java.runtime.version=1.8.0_162-b12

system:java.specification.name=Java Platform API Specification

system:java.specification.vendor=Oracle Corporation

system:java.specification.version=1.8

system:java.vendor=Oracle Corporation

system:java.vendor.url=http://java.oracle.com/

system:java.vendor.url.bug=http://bugreport.sun.com/bugreport/

system:java.version=1.8.0_162

system:java.vm.info=mixed mode

system:java.vm.name=Java HotSpot(TM) 64-Bit Server VM

system:java.vm.specification.name=Java Virtual Machine Specification

system:java.vm.specification.vendor=Oracle Corporation

system:java.vm.specification.version=1.8

system:java.vm.vendor=Oracle Corporation

system:java.vm.version=25.162-b12

system:line.separator=

system:os.arch=amd64

system:os.name=Linux

system:os.version=3.10.0-693.17.1.el7.x86_64

system:path.separator=:

system:sun.arch.data.model=64

system:sun.boot.class.path=/usr/java/jdk1.8.0_162/jre/lib/resources.jar:/usr/java/jdk1.8.0_162/jre/lib/rt.jar:/usr/java/jdk1.8.0_162/jre/lib/sunrsasign.jar:/usr/java/jdk1.8.0_162/jre/lib/jsse.jar:/usr/java/jdk1.8.0_162/jre/lib/jce.jar:/usr/java/jdk1.8.0_162/jre/lib/charsets.jar:/usr/java/jdk1.8.0_162/jre/lib/jfr.jar:/usr/java/jdk1.8.0_162/jre/classes

system:sun.boot.library.path=/usr/java/jdk1.8.0_162/jre/lib/amd64

system:sun.cpu.endian=little

system:sun.cpu.isalist=

system:sun.io.unicode.encoding=UnicodeLittle

system:sun.java.launcher=SUN_STANDARD

system:sun.jnu.encoding=UTF-8

system:sun.management.compiler=HotSpot 64-Bit Tiered Compilers

system:sun.os.patch.level=unknown

system:user.country=US

system:user.dir=/run/cloudera-scm-agent/process/385-hive-HIVESERVER2

system:user.home=/var/lib/hive

system:user.language=en

system:user.name=hive

system:user.timezone=Asia/Singapore

682 rows selected (0.166 seconds)

Closing: 0: jdbc:hive2://cdh-vm.dbaglobe.com:10000/test

0: jdbc:hive2://cdh-vm.dbaglobe.com:10000/tes> 

[donghua@cdh-vm scripts]$ 

↧