2017-02-10 90 views
1

我想從xml中使用xml serde在Hive中提取時間戳。外部表格創建鏈接到hdfs目錄。目前,時間戳值在我的表中顯示爲空。XML Hive Serde提取時間戳Hadoop

我想時間戳需要被鑄造?我不確定。其餘的xml信息工作正常,並顯示在配置單元中。

輸入文件是:

<example> 
<date>2017-02-09 22:03:58<date> 
</example> 

蜂巢創建腳本:

create external table example (
date timestamp 
) 
ROW FORMAT SERDE 'com.ibm.spss.hive.serde2.xml.XmlSerDe' 
WITH SERDEPROPERTIES (
"column.xpath.date"="/example/date/text()" 
) 
STORED AS 
INPUTFORMAT 'com.ibm.spss.hive.serde2.xml.XmlInputFormat' 
OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat' 
LOCATION 'mypath' 
TBLPROPERTIES (
"xmlinput.start"="<example>", 
"xmlinput.end"="</example>" 
); 

回答

1

似乎只有Java的原始類型被支持。
查看XmlUtils.java文件中的getPrimitiveValue方法。

/** 
* (c) Copyright IBM Corp. 2013. All rights reserved. 
* 
* Licensed under the Apache License, Version 2.0 (the "License"). 
* You may not use this file except in compliance with the License. 
* You may obtain a copy of the License at 
* 
* http://www.apache.org/licenses/LICENSE-2.0 
* 
* Unless required by applicable law or agreed to in writing, software 
* distributed under the License is distributed on an "AS IS" BASIS, 
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
* See the License for the specific language governing permissions and 
* limitations under the License. 
*/ 

package com.ibm.spss.hive.serde2.xml.processor; 

import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; 

/** 
* The XML utilities 
*/ 
public class XmlUtils { 

    /** 
    * Private constructor 
    */ 
    private XmlUtils() { 
    } 

    /** 
    * Converts the string value to the java object for the given primitive category 
    * 
    * @param value 
    *   the value 
    * @param primitiveCategory 
    *   the primitive category 
    * @return the java object 
    */ 
    public static Object getPrimitiveValue(String value, PrimitiveCategory primitiveCategory) { 
     if (value != null) { 
      try { 
       switch (primitiveCategory) { 
        case BOOLEAN: 
         return Boolean.valueOf(value); 
        case BYTE: 
         return Byte.valueOf(value); 
        case DOUBLE: 
         return Double.valueOf(value); 
        case FLOAT: 
         return Float.valueOf(value); 
        case INT: 
         return Integer.valueOf(value); 
        case LONG: 
         return Long.valueOf(value); 
        case SHORT: 
         return Short.valueOf(value); 
        case STRING: 
         return value; 
        default: 
         throw new IllegalStateException(primitiveCategory.toString()); 
       } 
      } catch (Exception ignored) { 
      } 
     } 
     return null; 
    } 

}