2016-08-14 199 views
2

我是新的python我想用matplotlib和subplots的PDF頁面將數據的大量數據保存到PDF中。問題是,我發現了一個blottleneck我不知道如何解決,該代碼都類似:python節省多個subplot數字爲pdf

import matplotlib.pyplot as plt 
from matplotlib.backends.backend_pdf import PdfPages 

with PdfPages('myfigures.pdf') as pdf: 
for i in range(1000): 
    f,axarr = plt.subplots(2, 3) 
    plt.subplots(2, 3) 
    axarr[0, 0].plot(x1, y1) 
    axarr[1, 0].plot(x2, y2) 

    pdf.savefig(f) 
    plt.close('all') 

創建圖中,每個迴路是非常費時,但如果我把環路以外它不清除每個情節。我嘗試過的其他選項(如clear()或clf())既不能工作也不能結束創建多個不同的數字,任何人都想知道如何以不同的方式使它變得更快?

回答

5

要將大量的子圖作爲多頁輸出包含在一個pdf中,只需在檢測到最新的子圖添加已經超出當前頁的子圖數組中的可用空間後立即創建一個新頁佈局。這裏有一個辦法做到這一點,其中的尺寸控制每頁次要情節的數量可以很容易地改變:

import matplotlib 
import matplotlib.pyplot as plt 
import numpy as np 
import sys 
import timeit 
from matplotlib.backends.backend_pdf import PdfPages 
matplotlib.rcParams.update({'font.size': 6}) 

# Dimensions for any n-rows x m-cols array of subplots/pg. 
n, m = 4, 5 

# Don't forget to indent after the with statement 
with PdfPages('auto_subplotting.pdf') as pdf: 

    # Let's time the execution required to create and save 
    # each full page of subplots to the pdf 
    start_time = timeit.default_timer() 

    # Before beginning the iteration through all the data, 
    # initialize the layout for the plots and create a 
    # representation of the subplots that can be easily 
    # iterated over for knowing when to create the next page 
    # (and also for custom settings like partial axes labels) 
    f, axarr = plt.subplots(n, m, sharex='col', sharey='row') 
    arr_ij = [(x,y) for x,y in np.ndindex(axarr.shape)] 
    subplots = [axarr[index] for index in arr_ij] 

    # To conserve needed plotting real estate, 
    # only label the bottom row and leftmost subplots 
    # as determined automatically using n and m 
    splot_index = 0 
    for s,splot in enumerate(subplots): 
     splot.set_ylim(0,.15) 
     splot.set_xlim(0,50) 
     last_row = (n*m-s < m+1) 
     first_in_row = (s % m == 0) 
     if last_row: 
      splot.set_xlabel("X-axis label") 
     if first_in_row: 
      splot.set_ylabel("Y-axis label") 

    # Iterate through each sample in the data 
    for sample in range(33): 

     # As a stand-in for real data, let's just make numpy take 100 random draws 
     # from a poisson distribution centered around say ~25 and then display 
     # the outcome as a histogram 
     scaled_y = np.random.randint(20,30) 
     random_data = np.random.poisson(scaled_y, 100) 
     subplots[splot_index].hist(random_data, bins=12, normed=True, 
            fc=(0,0,0,0), lw=0.75, ec='b') 

     # Keep subplotting through the samples in the data and increment 
     # a counter each time. The page will be full once the count is equal 
     # to the product of the user-set dimensions (i.e. n * m) 
     splot_index += 1 

     # We can basically repeat the same exact code block used for the 
     # first layout initialization, but with the addition of 4 new lines: 
     # 2 for saving the just-finished page to the pdf, 1 for the 
     # page's execution time, & 1 more to reset the subplot index 
     if splot_index == n*m: 
      pdf.savefig() 
      plt.close(f) 
      print(timeit.default_timer()-start_time) 
      start_time = timeit.default_timer() 
      f, axarr = plt.subplots(n, m, sharex='col', sharey='row') 
      arr_ij = [(x,y) for x,y in np.ndindex(axarr.shape)] 
      subplots = [axarr[index] for index in arr_ij] 
      splot_index = 0 
      for s,splot in enumerate(subplots): 
       splot.set_ylim(0,.15) 
       splot.set_xlim(0,50) 
       last_row = ((n*m)-s < m+1) 
       first_in_row = (s % m == 0) 
       if last_row: 
        splot.set_xlabel("X-axis label") 
       if first_in_row: 
        splot.set_ylabel("Y-axis label") 

    # Done!  
    # But don't forget the last page 
    pdf.savefig() 
    plt.close(f) 

爲2×3的佈局,只是改變n的聲明,並在代碼塊的開始相應米作爲一個更緊湊的佈局的一個例子,在4x5的33個樣本以上矩陣,使地塊爲提供了以下兩頁輸出:

在那裏你可以在軸幀的第二頁上看到是爲整個佈局製作的,但最後7個是空白的(4x5 x 2 = 40 - 33 = 7)。

timeit的打印輸出爲創建第一頁的1.81540203094秒。


注: 多頁的處理也許應該通過創建一個new_page功能被簡化;最好不要逐字重複代碼,特別是如果你開始自定義情節,在這種情況下你不會想要反映每一個變化並且輸入同樣的東西兩次。以seaborn爲基礎的更加定製化的美學,並利用如下所示的可用matplotlib參數也許更好。

添加new_page功能次要情節的風格&一些自定義:

from __future__ import print_function 
import matplotlib.pyplot as plt 
import numpy as np 
import random 
import seaborn as sns 
import timeit 
from matplotlib.backends.backend_pdf import PdfPages 

# this erases labels for any blank plots on the last page 
sns.set(font_scale=0.0) 
n, m = 4, 6 
datasize = 37 
ctheme = ['k', 'gray', 'magenta', 'fuchsia', '#be03fd', '#1e488f', 
      (0.44313725490196076, 0.44313725490196076, 0.88627450980392153), 
      '#75bbfd', 'teal', 'lime', 'g', (0.6666674, 0.6666663, 0.29078014184397138), 
      'y', '#f1da7a', 'tan','orange', 'maroon', 'r'] 
colors = sns.blend_palette(ctheme, datasize) 
fz = 7 # labels fontsize 

def new_page(n, m): 
    global splot_index 
    splot_index = 0 
    fig, axarr = plt.subplots(n, m, sharey='row') 
    plt.subplots_adjust(hspace=0.5, wspace=0.15) 
    arr_ij = [(x,y) for x,y in np.ndindex(axarr.shape)] 
    subplots = [axarr[index] for index in arr_ij] 
    for s,splot in enumerate(subplots): 
     splot.grid(b=True, which='major', color='gray', linestyle='-', 
        alpha=0.25, zorder=1, lw=0.5) 
     splot.set_ylim(0,.15) 
     splot.set_xlim(0,50) 
     last_row = (n*m-s < m+1) 
     first_in_row = (s % m == 0) 
     if last_row: 
      splot.set_xlabel("X-axis label", labelpad=8, fontsize=fz) 
     if first_in_row: 
      splot.set_ylabel("Y-axis label", labelpad=8, fontsize=fz) 
    return(fig, subplots) 


with PdfPages('auto_subplotting_colors.pdf') as pdf: 
    start_time = timeit.default_timer() 
    fig, subplots = new_page(n, m) 

    for sample in xrange(datasize): 
     splot = subplots[splot_index] 
     splot_index += 1 
     scaled_y = np.random.randint(20,30) 
     random_data = np.random.poisson(scaled_y, 100) 
     splot.hist(random_data, bins=12, normed=True, zorder=2, alpha=0.99, 
        fc='white', lw=0.75, ec=colors.pop()) 
     splot.set_title("Sample {}".format(sample+1), fontsize=fz) 
     # tick fontsize & spacing 
     splot.xaxis.set_tick_params(pad=4, labelsize=6) 
     splot.yaxis.set_tick_params(pad=4, labelsize=6) 

     # make new page: 
     if splot_index == n*m: 
      pdf.savefig() 
      plt.close(fig) 
      print(timeit.default_timer()-start_time) 
      start_time = timeit.default_timer() 
      fig, subplots = new_page(n, m) 

    if splot_index > 0: 
     pdf.savefig() 
     plt.close(f) 


這一次,它花了2.51897096634秒第一頁:

+0

哇,有點矯枉過正但非常感謝你,它工作得很好,我可以問問碼是否來自s有點有點兒網頁?這非常有趣 – Victor

+0

沒有概率 - 只是想徹底,我實際上一直在工作中使用它,所以它對我來說也是一個很好的進修。 thx我想出了用於分析dna測序結果的代碼,這些代碼需要各種非典型定製格式,並且我希望以更加定製的/不太明顯的默認matplotlib風格呈現可呈現的數字。 – johnxcollins