2017-02-19 76 views
0
Public Function fileToColHarvest(ByRef stream As Scripting.TextStream, Optional ByRef limit As Integer = 2000000, Optional ByRef unique As Boolean = False, Optional ByRef FirstSectionAsKey As Boolean = False, Optional ByRef prob As Double = 1) As Generic.List(Of String) 
    Dim buffer As String 
    Dim i As Integer 

    If prob < 1 Then 
    End If 
    fileToColHarvest = New Generic.List(Of String) 
    Do While (Not (stream.AtEndOfStream)) 
     i = i + 1 
     System.Windows.Forms.Application.DoEvents() 

     'If Microsoft.VisualBasic.Rnd < 0.01 Then 
     ' appendToTextFile CStr(fileToColHarvest.Count) + "|" + microsoft.visualbasic.str(i) + "|" + buffer, RESULT, ForWriting 
     'End If 
     buffer = stream.ReadLine 
     'buffer = Microsoft.VisualBasic.Replace(buffer, " ", "+") 
     If Microsoft.VisualBasic.Rnd() < prob Then 
      If unique Then 
       If Not FirstSectionAsKey Then 
        fileToColHarvest.AddIfNotExist(buffer) 
       Else 
        fileToColHarvest.AddIfNotExist(buffer) 
       End If 
      Else 
       fileToColHarvest.Add(buffer) 
      End If 
     End If 
     If fileToColHarvest.Count() >= limit Then 
      Exit Do 
     End If 
    Loop 
End Function 

基本上我想擺脫Scripting.TextStream。如何最有效地將此代碼替換爲vb.net版本?

我也想通過線

+0

使用'StreamReader'和用於讀取和處理文件行的'ReadLineAsync'方法。從MSDN:[https://msdn.microsoft.com/en-us/library/yhfzs7at(v = vs.110).aspx](https://msdn.microsoft.com/en-us/library/yhfzs7at( v = vs.110).aspx) – Fabio

+0

1)將它寫入程序集。 2)寫下來,真的很快。 –

回答

3

閱讀文本行您可以使用StreamReader和異步ReadLineAsync方法。
異步的方式將取代「醜」 Application.DoEvents()

Public Async Function FileToColHarvest(
    pathToFile As String, 
    limit As Integer, 
    isUnique As Boolean, 
    isFirstSectionAsKey As Boolean, 
    prob As Single) As Task(Of List(Of String)) 

    Dim lines = New List(Of String)() 
    Dim uniqueLines = New HashSet(Of String)() 

    Using stream As New FileStream(pathToFile, FileMode.Open) 
     Using reader As New StreamReader(stream) 
      While reader.EndOfStream = False 
       'Await will prevent blocking UI thread 
       var line = Await reader.ReadLineAsync() 

       If prob < VBMath.Rnd() Then Continue While 

       ' I have removed check of isFirstSectionAsKey 
       ' because based on your code it does same thing 
       If isUnique Then 
        uniqueLines.Add(line) 
        If uniqueLines.Count >= limit Then Return uniqueLines.ToList() 
       Else 
        lines.Add(line) 
        If lines.Count >= limit Then Return lines 
       End If 
      End While    
     End Using 
    End Using 

    Return If(isUnique, uniqueLines.ToList(), lines) 
End Function 

不相關,但isUnique參數鴻溝這種方法在兩個不同的邏輯 - 所以我建議,而不是參數介紹兩種不同的方法

FileToColHarvest(...) 
FileToColHarvestWithUniqueOnly(...)