Hive 分区表 进行动态插入

创建分区表

spark.sql(          """            |create table mro_ns2_hive_db.tmp_mro_msisdn_njy_xgboots_105_ts_partitions            |(            |sc_longitude double            |,sc_latitude double            |,sc_pci      double            |,sc_freq     double            |,scrsrp      double            |,scrsrq      double            |,nc1pci      double            |,nc1freq     double            |,nc1rsrp     double            |,nc1rsrq     double            |,nc2pci      double            |,nc2freq     double            |,nc2rsrp     double            |,nc2rsrq     double            |,nc3pci      double            |,nc3freq     double            |,nc3rsrp     double            |,nc3rsrq     double            |,sctadv      double            |,longitude   double            |,latitude double            |,time_stamp string            |,msisdn      string            |)            |partitioned by (key Int)            |ROW FORMAT DELIMITED            |FIELDS TERMINATED BY '\t'            |STORED AS TEXTFILE          """.stripMargin)

注意这里的分区字段不能在()中出现

动态插入

spark.sql(      """        |set hive.exec.dynamic.partition.mode=nonstrict        |set hive.exec.dynamic.partition=true      """.stripMargin)

这里是设置动态插入

spark.sql(        """          |insert overwrite table mro_ns2_hive_db.tmp_mro_msisdn_njy_xgboots_105_ts_partitions          |PARTITION(key)          |select          |CAST( sc_longitude as DECIMAL(10,7)) as sc_longitude          |,CAST( sc_latitude as DECIMAL(10,7)) as sc_latitude          |,CAST( sc_pci as decimal) as sc_pci          |,CAST( sc_freq as  decimal ) as sc_freq          |,CAST( scrsrp as  decimal ) as scrsrp          |,CAST( scrsrq as  decimal ) as scrsrq          |,CAST( nc1pci as  decimal ) as nc1pci          |,CAST( nc1freq as  decimal ) as nc1freq          |,CAST( nc1rsrp as  decimal ) as nc1rsrp          |,CAST( nc1rsrq as  decimal ) as nc1rsrq          |,CAST( nc2pci as  decimal ) as nc2pci          |,CAST( nc2freq as  decimal ) as nc2freq          |,CAST( nc2rsrp as  decimal ) as nc2rsrp          |,CAST( nc2rsrq as  decimal ) as nc2rsrq          |,CAST( nc3pci as  decimal ) as nc3pci          |,CAST( nc3freq as  decimal ) as nc3freq          |,CAST( nc3rsrp as  decimal ) as nc3rsrp          |,CAST( nc3rsrq as  decimal ) as nc3rsrq          |,CAST(sctadv AS decimal)   as sctadv          |,case when instr(longitude ,'.')>0 then CAST(longitude AS DECIMAL(10,7))          |else (CAST(longitude AS DECIMAL(10,7))  * 360 / 16777216.0) end as longitude          |,case when instr(latitude ,'.')>0 then CAST( latitude as DECIMAL(10,7))          |else (CAST(latitude as DECIMAL(10,7)) * 90 / 8388608.0) end as latitude          |,time_stamp          |,msisdn          |,key(CAST( sc_longitude as DECIMAL(10,7)),CAST( sc_latitude as DECIMAL(10,7))) as key          |from mro_ns2_hive_db.tmp_mro_msisdn_njy_xgboots_105_ts          |where city_id=571          |distribute by cast(rand()*4 as int)        """.stripMargin)

来源:https://www.icode9.com/content-4-815151.html

(0)

相关推荐