In [1]: data = """VOD 02/25/2013 00:00:00.000 0
VOD 02/25/2013 00:01:00.000 0
VOD 02/25/2013 00:02:00.000 0
VOD 02/25/2013 00:03:00.000 0
VOD 02/25/2013 00:04:00.000 0
VOD 02/25/2013 00:05:00.000 0
VOD 02/25/2013 00:06:00.000 0 """
In [2]: df = pd.read_csv(StringIO(data),sep='\s+',names=['ticker','date','time','value'])
In [3]: df2 = pd.concat([df]*100000*2)
In [4]: df2.info()
<class 'pandas.core.frame.DataFrame'>
Int64Index: 1400000 entries, 0 to 6
Data columns (total 4 columns):
ticker 1400000 non-null object
date 1400000 non-null object
time 1400000 non-null object
value 1400000 non-null int64
dtypes: int64(1), object(3)
memory usage: 53.4+ MB
In [5]: result1 = pd.to_datetime(df2['date'] + ' ' + df2['time'],format='%m/%d/%Y %H:%M:%S.%f')
In [6]: result2 = pd.to_datetime(df2['date'], format="%m/%d/%Y") + pd.to_timedelta(df2['time'])
result1
In [7]: result1.equals(result2)
Out[7]: True
In [9]: result1.head()
Out[9]:
0 2013-02-25 00:00:00
1 2013-02-25 00:01:00
2 2013-02-25 00:02:00
3 2013-02-25 00:03:00
4 2013-02-25 00:04:00
dtype: datetime64[ns]
這裏有2種方法,在這種情況下,它支付一次解析所有(例如4)。請注意,這是與主,0.16.2將在timedelta解析有點慢。
In [5]: %timeit pd.to_datetime(df2['date'], format="%m/%d/%Y") + pd.to_timedelta(df2['time'])
1 loops, best of 3: 9.76 s per loop
In [4]: %timeit pd.to_datetime(df2['date'] + ' ' + df2['time'],format='%m/%d/%Y %H:%M:%S.%f')
1 loops, best of 3: 8.81 s per loop