1、建表
create table inpatient( PATIENT_NO String COMMENT '住院号', NAME String COMMENT '姓名', SEX_CODE String COMMENT '性别', BIRTHDATE TIMESTAMP COMMENT '生日', BALANCE_COST String COMMENT '总费用') COMMENT '住院患者基本信息' ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler' WITH SERDEPROPERTIES ("hbase.columns.mapping" = ":key,pinfo:INPATIENT_NO,pinfo:NAME,pinfo:SEX_CODE,pinfo:BIRTHDATE,pinfo:BALANCE_COST") TBLPROPERTIES ("hbase.table.name" = "inpatient");
2、Hbase导入数据
2.1、Hbase直接导入
./hbase org.apache.hadoop.hbase.mapreduce.ImportTsv -Dimporttsv.separator="," -Dimporttsv.columns=HBASE_ROW_KEY,pinfo:INPATIENT_NO,pinfo:NAME,pinfo:SEX_CODE,pinfo:BIRTHDATE,pinfo:BALANCE_COST inpatient /usr/hadoop/inpatient.txt ...... 2016-12-22 10:33:36,985 INFO [main] client.RMProxy: Connecting to ResourceManager at hadoop-master/172.16.172.13:8032 2016-12-22 10:33:37,340 INFO [main] Configuration.deprecation: io.bytes.per.checksum is deprecated. Instead, use dfs.bytes-per-checksum 2016-12-22 10:33:43,450 INFO [main] input.FileInputFormat: Total input paths to process : 1 2016-12-22 10:33:44,640 INFO [main] mapreduce.JobSubmitter: number of splits:1 2016-12-22 10:33:44,952 INFO [main] Configuration.deprecation: io.bytes.per.checksum is deprecated. Instead, use dfs.bytes-per-checksum 2016-12-22 10:33:47,173 INFO [main] mapreduce.JobSubmitter: Submitting tokens for job: job_1482371551462_0002 2016-12-22 10:33:50,830 INFO [main] impl.YarnClientImpl: Submitted application application_1482371551462_0002 2016-12-22 10:33:51,337 INFO [main] mapreduce.Job: The url to track the job: http://hadoop-master:8088/proxy/application_1482371551462_0002/ 2016-12-22 10:33:51,338 INFO [main] mapreduce.Job: Running job: job_1482371551462_0002 2016-12-22 10:34:39,499 INFO [main] mapreduce.Job: Job job_1482371551462_0002 running in uber mode : false 2016-12-22 10:34:39,572 INFO [main] mapreduce.Job: map 0% reduce 0% 2016-12-22 10:35:48,228 INFO [main] mapreduce.Job: map 1% reduce 0% 2016-12-22 10:36:06,876 INFO [main] mapreduce.Job: map 3% reduce 0% 2016-12-22 10:36:09,981 INFO [main] mapreduce.Job: map 5% reduce 0% 2016-12-22 10:36:13,739 INFO [main] mapreduce.Job: map 7% reduce 0% 2016-12-22 10:36:17,592 INFO [main] mapreduce.Job: map 10% reduce 0% 2016-12-22 10:36:22,891 INFO [main] mapreduce.Job: map 12% reduce 0% 2016-12-22 10:36:45,217 INFO [main] mapreduce.Job: map 17% reduce 0% 2016-12-22 10:37:14,914 INFO [main] mapreduce.Job: map 20% reduce 0% 2016-12-22 10:37:35,739 INFO [main] mapreduce.Job: map 25% reduce 0% 2016-12-22 10:37:39,013 INFO [main] mapreduce.Job: map 34% reduce 0% 2016-12-22 10:38:24,289 INFO [main] mapreduce.Job: map 42% reduce 0% 2016-12-22 10:38:36,644 INFO [main] mapreduce.Job: map 49% reduce 0% 2016-12-22 10:38:57,618 INFO [main] mapreduce.Job: map 54% reduce 0% 2016-12-22 10:39:00,808 INFO [main] mapreduce.Job: map 56% reduce 0% 2016-12-22 10:39:07,879 INFO [main] mapreduce.Job: map 58% reduce 0% 2016-12-22 10:39:11,489 INFO [main] mapreduce.Job: map 60% reduce 0% 2016-12-22 10:39:24,708 INFO [main] mapreduce.Job: map 62% reduce 0% 2016-12-22 10:39:29,188 INFO [main] mapreduce.Job: map 63% reduce 0% 2016-12-22 10:39:34,165 INFO [main] mapreduce.Job: map 65% reduce 0% 2016-12-22 10:40:12,473 INFO [main] mapreduce.Job: map 66% reduce 0% 2016-12-22 10:40:39,471 INFO [main] mapreduce.Job: map 73% reduce 0% 2016-12-22 10:40:40,910 INFO [main] mapreduce.Job: map 74% reduce 0% 2016-12-22 10:40:42,936 INFO [main] mapreduce.Job: map 75% reduce 0% 2016-12-22 10:40:46,471 INFO [main] mapreduce.Job: map 77% reduce 0% 2016-12-22 10:40:50,495 INFO [main] mapreduce.Job: map 79% reduce 0% 2016-12-22 10:40:53,267 INFO [main] mapreduce.Job: map 81% reduce 0% 2016-12-22 10:41:06,843 INFO [main] mapreduce.Job: map 83% reduce 0% 2016-12-22 10:41:13,140 INFO [main] mapreduce.Job: map 92% reduce 0% 2016-12-22 10:41:22,305 INFO [main] mapreduce.Job: map 93% reduce 0% 2016-12-22 10:41:27,671 INFO [main] mapreduce.Job: map 96% reduce 0% 2016-12-22 10:41:48,688 INFO [main] mapreduce.Job: map 100% reduce 0% 2016-12-22 10:43:20,552 INFO [main] mapreduce.Job: Job job_1482371551462_0002 completed successfully 2016-12-22 10:43:28,574 INFO [main] mapreduce.Job: Counters: 31 File System Counters FILE: Number of bytes read=0 FILE: Number of bytes written=127746 FILE: Number of read operations=0 FILE: Number of large read operations=0 FILE: Number of write operations=0 HDFS: Number of bytes read=43306042 HDFS: Number of bytes written=0 HDFS: Number of read operations=2 HDFS: Number of large read operations=0 HDFS: Number of write operations=0 Job Counters Launched map tasks=1 Data-local map tasks=1 Total time spent by all maps in occupied slots (ms)=460404 Total time spent by all reduces in occupied slots (ms)=0 Total time spent by all map tasks (ms)=460404 Total vcore-seconds taken by all map tasks=460404 Total megabyte-seconds taken by all map tasks=471453696 Map-Reduce Framework Map input records=115411 Map output records=115152 Input split bytes=115 Spilled Records=0 Failed Shuffles=0 Merged Map outputs=0 GC time elapsed (ms)=26590 CPU time spent (ms)=234550 Physical memory (bytes) snapshot=83329024 Virtual memory (bytes) snapshot=544129024 Total committed heap usage (bytes)=29036544 ImportTsv Bad Lines=259 File Input Format Counters Bytes Read=43305927 File Output Format Counters Bytes Written=0
2.2、completebulkload导入
#/etc/profile中添加下面一行 #export HADOOP_CLASSPATH="$HADOOP_CLASSPATH:$HBASE_HOME/lib/*" ./hbase org.apache.hadoop.hbase.mapreduce.ImportTsv -Dimporttsv.separator="," -Dimporttsv.bulk.output=/usr/hadoop/inpatient.tmp -Dimporttsv.columns=HBASE_ROW_KEY,pinfo:INPATIENT_NO,pinfo:NAME,pinfo:SEX_CODE,pinfo:BIRTHDATE,pinfo:BALANCE_COST inpatient /usr/hadoop/inpatient.txt ...... 2016-12-22 12:26:04,496 INFO [main] client.RMProxy: Connecting to ResourceManager at hadoop-master/172.16.172.13:8032 2016-12-22 12:26:12,411 INFO [main] input.FileInputFormat: Total input paths to process : 1 2016-12-22 12:26:12,563 INFO [main] mapreduce.JobSubmitter: number of splits:1 2016-12-22 12:26:12,577 INFO [main] Configuration.deprecation: io.bytes.per.checksum is deprecated. Instead, use dfs.bytes-per-checksum 2016-12-22 12:26:13,220 INFO [main] mapreduce.JobSubmitter: Submitting tokens for job: job_1482371551462_0005 2016-12-22 12:26:13,764 INFO [main] impl.YarnClientImpl: Submitted application application_1482371551462_0005 2016-12-22 12:26:13,832 INFO [main] mapreduce.Job: The url to track the job: http://hadoop-master:8088/proxy/application_1482371551462_0005/ 2016-12-22 12:26:13,833 INFO [main] mapreduce.Job: Running job: job_1482371551462_0005 2016-12-22 12:26:35,952 INFO [main] mapreduce.Job: Job job_1482371551462_0005 running in uber mode : false 2016-12-22 12:26:36,156 INFO [main] mapreduce.Job: map 0% reduce 0% 2016-12-22 12:27:15,839 INFO [main] mapreduce.Job: map 3% reduce 0% 2016-12-22 12:27:18,868 INFO [main] mapreduce.Job: map 53% reduce 0% 2016-12-22 12:27:21,981 INFO [main] mapreduce.Job: map 58% reduce 0% 2016-12-22 12:27:29,195 INFO [main] mapreduce.Job: map 67% reduce 0% 2016-12-22 12:27:41,582 INFO [main] mapreduce.Job: map 83% reduce 0% 2016-12-22 12:27:52,819 INFO [main] mapreduce.Job: map 85% reduce 0% 2016-12-22 12:27:59,189 INFO [main] mapreduce.Job: map 93% reduce 0% 2016-12-22 12:28:07,498 INFO [main] mapreduce.Job: map 100% reduce 0% 2016-12-22 12:29:11,199 INFO [main] mapreduce.Job: map 100% reduce 67% 2016-12-22 12:29:24,353 INFO [main] mapreduce.Job: map 100% reduce 70% 2016-12-22 12:29:32,324 INFO [main] mapreduce.Job: map 100% reduce 74% 2016-12-22 12:29:37,001 INFO [main] mapreduce.Job: map 100% reduce 79% 2016-12-22 12:29:38,011 INFO [main] mapreduce.Job: map 100% reduce 82% 2016-12-22 12:29:41,038 INFO [main] mapreduce.Job: map 100% reduce 84% 2016-12-22 12:29:45,082 INFO [main] mapreduce.Job: map 100% reduce 88% 2016-12-22 12:29:48,115 INFO [main] mapreduce.Job: map 100% reduce 90% 2016-12-22 12:29:51,154 INFO [main] mapreduce.Job: map 100% reduce 92% 2016-12-22 12:29:54,186 INFO [main] mapreduce.Job: map 100% reduce 94% 2016-12-22 12:29:57,205 INFO [main] mapreduce.Job: map 100% reduce 97% 2016-12-22 12:30:00,236 INFO [main] mapreduce.Job: map 100% reduce 100% 2016-12-22 12:30:06,388 INFO [main] mapreduce.Job: Job job_1482371551462_0005 completed successfully 2016-12-22 12:30:09,203 INFO [main] mapreduce.Job: Counters: 50 File System Counters FILE: Number of bytes read=237707880 FILE: Number of bytes written=357751428 FILE: Number of read operations=0 FILE: Number of large read operations=0 FILE: Number of write operations=0 HDFS: Number of bytes read=43306042 HDFS: Number of bytes written=195749237 HDFS: Number of read operations=8 HDFS: Number of large read operations=0 HDFS: Number of write operations=3 Job Counters Launched map tasks=1 Launched reduce tasks=1 Data-local map tasks=1 Total time spent by all maps in occupied slots (ms)=99691 Total time spent by all reduces in occupied slots (ms)=83330 Total time spent by all map tasks (ms)=99691 Total time spent by all reduce tasks (ms)=83330 Total vcore-seconds taken by all map tasks=99691 Total vcore-seconds taken by all reduce tasks=83330 Total megabyte-seconds taken by all map tasks=102083584 Total megabyte-seconds taken by all reduce tasks=85329920 Map-Reduce Framework Map input records=115411 Map output records=115152 Map output bytes=118397787 Map output materialized bytes=118853937 Input split bytes=115 Combine input records=115152 Combine output records=115077 Reduce input groups=115077 Reduce shuffle bytes=118853937 Reduce input records=115077 Reduce output records=3337137 Spilled Records=345231 Shuffled Maps =1 Failed Shuffles=0 Merged Map outputs=1 GC time elapsed (ms)=2017 CPU time spent (ms)=38130 Physical memory (bytes) snapshot=383750144 Virtual memory (bytes) snapshot=1184014336 Total committed heap usage (bytes)=231235584 ImportTsv Bad Lines=259 Shuffle Errors BAD_ID=0 CONNECTION=0 IO_ERROR=0 WRONG_LENGTH=0 WRONG_MAP=0 WRONG_REDUCE=0 File Input Format Counters Bytes Read=43305927 File Output Format Counters Bytes Written=195749237
3、在hive中进行查询
hive> select * from inpatient limit 1; OK ...... Time taken: 12.419 seconds, Fetched: 1 row(s) hive> select count(*) from inpatient; Query ID = hadoop_20161222114304_b247c745-a6ec-4e52-b76d-daefb657ac20 Total jobs = 1 Launching Job 1 out of 1 Number of reduce tasks determined at compile time: 1 In order to change the average load for a reducer (in bytes): set hive.exec.reducers.bytes.per.reducer=<number> In order to limit the maximum number of reducers: set hive.exec.reducers.max=<number> In order to set a constant number of reducers: set mapreduce.job.reduces=<number> Starting Job = job_1482371551462_0004, Tracking URL = http://hadoop-master:8088/proxy/application_1482371551462_0004/ Kill Command = /home/hadoop/Deploy/hadoop-2.5.2/bin/hadoop job -kill job_1482371551462_0004 Hadoop job information for Stage-1: number of mappers: 1; number of reducers: 1 2016-12-22 11:44:22,634 Stage-1 map = 0%, reduce = 0% 2016-12-22 11:45:08,704 Stage-1 map = 100%, reduce = 0%, Cumulative CPU 5.74 sec 2016-12-22 11:45:50,754 Stage-1 map = 100%, reduce = 100%, Cumulative CPU 8.19 sec MapReduce Total cumulative CPU time: 8 seconds 190 msec Ended Job = job_1482371551462_0004 MapReduce Jobs Launched: Stage-Stage-1: Map: 1 Reduce: 1 Cumulative CPU: 8.19 sec HDFS Read: 13353 HDFS Write: 7 SUCCESS Total MapReduce CPU Time Spent: 8 seconds 190 msec OK 115077 Time taken: 170.801 seconds, Fetched: 1 row(s) ./hadoop jar /home/hadoop/Deploy/hbase-1.1.2/lib/hbase-server-1.1.2.jar completebulkload /usr/hadoop/inpatient.tmp inpatient ...... 16/12/22 12:42:04 INFO zookeeper.ZooKeeper: Initiating client connection, connectString=localhost:2181 sessionTimeout=90000 watcher=hconnection-0x4df040780x0, quorum=localhost:2181, baseZNode=/hbase 16/12/22 12:42:04 INFO zookeeper.ClientCnxn: Opening socket connection to server localhost/127.0.0.1:2181. Will not attempt to authenticate using SASL (unknown error) 16/12/22 12:42:04 INFO zookeeper.ClientCnxn: Socket connection established to localhost/127.0.0.1:2181, initiating session 16/12/22 12:42:04 INFO zookeeper.ClientCnxn: Session establishment complete on server localhost/127.0.0.1:2181, sessionid = 0x5924755d380005, negotiated timeout = 90000 16/12/22 12:42:06 INFO zookeeper.RecoverableZooKeeper: Process identifier=hconnection-0x7979cd9c connecting to ZooKeeper ensemble=localhost:2181 16/12/22 12:42:06 INFO zookeeper.ZooKeeper: Initiating client connection, connectString=localhost:2181 sessionTimeout=90000 watcher=hconnection-0x7979cd9c0x0, quorum=localhost:2181, baseZNode=/hbase 16/12/22 12:42:06 INFO zookeeper.ClientCnxn: Opening socket connection to server localhost/127.0.0.1:2181. Will not attempt to authenticate using SASL (unknown error) 16/12/22 12:42:06 INFO zookeeper.ClientCnxn: Socket connection established to localhost/127.0.0.1:2181, initiating session 16/12/22 12:42:07 INFO zookeeper.ClientCnxn: Session establishment complete on server localhost/127.0.0.1:2181, sessionid = 0x5924755d380006, negotiated timeout = 90000 16/12/22 12:42:07 WARN mapreduce.LoadIncrementalHFiles: Skipping non-directory hdfs://hadoop-master:9000/usr/hadoop/inpatient.tmp/_SUCCESS 16/12/22 12:42:08 INFO hfile.CacheConfig: CacheConfig:disabled 16/12/22 12:42:08 INFO mapreduce.LoadIncrementalHFiles: Trying to load hfile=hdfs://hadoop-master:9000/usr/hadoop/inpatient.tmp/pinfo/7ee330c0f66c4d36b5d614a337d3929f first=" last="B301150360" 16/12/22 12:42:08 INFO client.ConnectionManager$HConnectionImplementation: Closing master protocol: MasterService 16/12/22 12:42:08 INFO client.ConnectionManager$HConnectionImplementation: Closing zookeeper sessionid=0x5924755d380006 16/12/22 12:42:08 INFO zookeeper.ZooKeeper: Session: 0x5924755d380006 closed 16/12/22 12:42:08 INFO zookeeper.ClientCnxn: EventThread shut down