處理大型數據集時，排序算法會導致堆棧溢出？

我正在尋找排序下列類型數據的更好方法。處理較大的處理大型數據集時，排序算法會導致堆棧溢出？

保存數據看起來像這樣

public class AttributeItem 
{ 
    public string AttributeType { get; set; } 
    public string Title { get; set; } 
    public string Value { get; set; } 
    public int ObjectID { get; set; } 
    public bool CanModify { get; set; } 
    public bool CanDelete { get; set; } 
    public bool? IsParent { get; set; } 
    public int SortID { get; set; } 
    public int? ParentSortID { get; set; } 
    public bool Deleted { get; set; } 
} 

public class AttributeItemNode 
{ 
    public AttributeItem Item {get;set;} 
    public int Depth {get;set;} 

    public AttributeItemNode(AttributeItem item , int Depth) 
    { 
     this.Item = item ; 
     this.Depth = Depth; 
    } 
}

這裏是到位的結構時，下面的工作正常較小的數據集（在某些系統上它的2000年其他9000），但會導致計算器一個數據的例子需要用一個int來表示它們的深度，並將其分類到一個單獨的對象中。有可能爲孩子的水平去比示例數據

var items = new List<AttributeItem>(); 
items.Add(new AttributeItem{Title ="Parent1", ObjectID=1,SortID =1, IsParent= true, ParentSortID = Int32.MinValue}); 

items.Add(new AttributeItem{Title ="FooChild", ObjectID=2,SortID =2, IsParent= false, ParentSortID = 1}); 

items.Add(new AttributeItem{Title ="Parent2", ObjectID=4,SortID =4, IsParent= true, ParentSortID = Int32.MinValue}); 

items.Add(new AttributeItem{ Title ="Parent2Child1", ObjectID=5,SortID =5, IsParent= false, ParentSortID = 4}); 

items.Add(new AttributeItem{Title ="Parent2Child2", ObjectID=7,SortID =7, IsParent= false, ParentSortID = 4}); 

items.Add(new AttributeItem{Title ="Parent2Child2Child1", ObjectID=6,SortID =6, IsParent= false, ParentSortID = 5});

預期產出將是如下所示的三個層次更深（我從對象中刪除無關的數據，以幫助可讀性）

Depth = 0 Title ="Parent1" 
Depth = 1 Title ="FooChild" 
Depth = 0 Title ="Parent2" 
Depth = 1 Title ="Parent2Child1" 
Depth = 2 Title ="Parent2Child2Child1" 
Depth = 1 Title ="Parent2Child2"

下面是實際的排序代碼

public static IList<AttributeItemNode> SortAttributeItems(IList<AttributeItem> list) 
    { 
     List<AttributeItemNode> newList = new List<AttributeItemNode>(); 
     SortAttributeItems(list, null, 0, newList); 

     return newList; 
    } 

    private static void SortAttributeItems(IList<AttributeItem> list, AttributeItem currentItem, int depth, List<AttributeItemNode> newList) 
    { 
     AttributeItem newItem = null; 
     // look for children 
     if (currentItem != null) 
     { 
      foreach (AttributeItem item in list) 
      { 
       if (item.ParentSortID.HasValue && item.ParentSortID.Value != Int32.MinValue && item.ParentSortID.Value == currentItem.SortID) 
       { 
        newList.Add(new AttributeItemNode(item, (depth + 1))); 
        SortAttributeItems(list, item, depth + 1, newList); 
       } 
      } 
     } 

     if (depth == 0) 
     { 
      foreach (AttributeItem item in list) 
      { 
       if (!item.ParentSortID.HasValue || item.ParentSortID.Value == Int32.MinValue) 
       { 
        if (currentItem == null || item.SortID >= currentItem.SortID) 
        { 
         if (newItem == null || newItem.SortID >= item.SortID) 
         { 
          newItem = item; 
         } 
        } 
       } 
      } 
     } 

     if (newItem != null) 
     { 
      newList.Add(new AttributeItemNode(newItem, depth)); 
      list.Remove(newItem); 
      SortAttributeItems(list, newItem, depth, newList); 
     } 

    }

來源

2016-10-01 Kevin Kunderman

我看到你的觀點，但在這個問題說明。它會因大量數據而失敗。所以有更多的只是尋找優化。如果它運行速度很慢，數據量較大，那麼我會同意。 –

然後編輯你的標題，詢問*改進*以外的其他內容，這意味着*做出更好的效果*，並且您的第一段提出要求*更好*，這也表示*它可以工作，但我想改進它*。 –

謝謝。我的近身投票已被撤回。 :-) –

該問題可以通過遞歸有效地解決。它可以分成兩部分 - 創建樹結構並使用迭代pre-order Depth First Traversal對樹進行平坦化，對每個級別進行排序。

對於第一部分，我們可以使用LINQ ToLookup方法在O（N）時間內創建ParentSortID的快速查找結構。

對於第二部分，繼DRY原理，我將通過創建允許從項目和深度投射到自定義結果過載使用通用方法從我的回答How to flatten tree via LINQ?（正如你可以看到我已經有）：

public static class TreeUtils 
{ 
    public static IEnumerable<TResult> Expand<T, TResult>(
     this IEnumerable<T> source, Func<T, IEnumerable<T>> elementSelector, Func<T, int, TResult> resultSelector) 
    { 
     var stack = new Stack<IEnumerator<T>>(); 
     var e = source.GetEnumerator(); 
     try 
     { 
      while (true) 
      { 
       while (e.MoveNext()) 
       { 
        var item = e.Current; 
        yield return resultSelector(item, stack.Count); 
        var elements = elementSelector(item); 
        if (elements == null) continue; 
        stack.Push(e); 
        e = elements.GetEnumerator(); 
       } 
       if (stack.Count == 0) break; 
       e.Dispose(); 
       e = stack.Pop(); 
      } 
     } 
     finally 
     { 
      e.Dispose(); 
      while (stack.Count != 0) stack.Pop().Dispose(); 
     } 
    } 
}

這裏是方法的問題實施：

public static IList<AttributeItemNode> SortAttributeItems(IList<AttributeItem> list) 
{ 
    var childrenMap = list.ToLookup(e => e.ParentSortID ?? int.MinValue); 
    return childrenMap[int.MinValue].OrderBy(item => item.SortID) 
     .Expand(parent => childrenMap[parent.SortID].OrderBy(item => item.SortID), 
      (item, depth) => new AttributeItemNode(item, depth)) 
     .ToList(); 
}

來源

2016-10-01 16:00:13

非常好的解決方案，謝謝 –

懷疑，在'Expand'方法調用中（item，depth）=> new AttributeItemNode（item，depth）'，那麼深度值來自哪裏，因爲它是

@MrinalKamboj它來自'Expand'方法（類似於遞歸方法中的'depth'變量）。由於我在裏面使用了一個顯式的'stack'，'stack.Count'是當前的深度。 –

任何原因，你不能簡單地跟隨父指針來計算的深度？

如果你把它放在一個Dictionary<int,AttributeItem> map與SortId爲重點，你現在可以把每AttributeItem item做：

int depth = 0; 
var current = item; 
while (!current.IsParent) 
{ 
    depth++; 
    current = map[current.ParentSortId; 
}

如果使用的是許多的NuGet包樹或者你可以這樣做圖的一個以及許多其他對您的數據的圖表操作，包括檢查它是否有效並且不包含循環。

最好不要讓相同的信息代表兩種方式：您有IsParent，但您也有ParentSortId上的標記值。如果這些不同意呢？等等。

來源

2016-10-01 03:11:29

您可以通過遍歷到「Parent」來爲子級分配深度，但仍然需要遞歸才能按照預期創建層級。實際上，我所提供的解決方案使用了一個類似的基於字典的策略 –

public class AttributeItemNode : IComparable<AttributeNode> { 

    public int CompareTo(AttributeItemNode other) { 
     // compare the Ids in appropriate order 
    } 
} 

public class NodeCollection { 
    protected List<AttributeItemNode> nodes; 

    public void AddNode() { } 

    public void Sort() { 
     nodes.Sort(); 
     this.CalcDepth(); 
    } 

    protected void CalcDepth { 
     foreach (var node in nodes) 
      if (node.IsParent) { node.Depth = 0; break; } 

      //use the various Ids that are now in sorted order 
      // and calculate the item's Depth. 
    } 
}

一個AttributeItem已經有它所需的所有排序。使用IsParent（可能是？），SortId和ParentSortId來實現上面的CompareTo()。

Calc深度只有在排序後，才能避免遞歸。

然後：

myNodeCollection.Sort()

List.Sort() .NET智能決定使用哪幾種排序算法。

來源

2016-10-01 04:54:49 radarbob

這肯定不會解決OP所要求的，在'AttributeItem List'中將Children鏈接到父項的第一部分沒有完成，然後OP要求列出項目在特定的子層次結構中，而不是通用的排序順序，這就是你的解決方案使用IComparable 實現的功能，因爲它不能爲每個子部分執行 –

問題是關於避免由遞歸引起的堆棧溢出和計算深度。按鍵順序是所有需要的。如果需要某種樹或鏈表，那麼只需遍歷排序後的列表並構建它。即使這樣，也避免了遞歸。那麼這個解決方案有可能處理大量數據。 – radarbob

請嘗試根據您的建議實施解決方案，您將瞭解哪裏無法按預期工作，請嘗試OP提供的數據。通過使用'IComparer'排序'Parents'和'Children'的排列過於簡化了問題，它不會導致預期的Tree結構 –

退房代碼的修改後的版本，我用一個字典映射父母和子女和遞歸以獲得預期的結果（在我這是更清潔和更容易管理版本）

重要假設

所有子元素，除了頂層父母有ParentSortId
的ObjectID是一個給定元素的標識符
ParentSortID是父

重要修改的的ObjectID（識別符）對於較深的遞歸級別

正如可以看出的遞歸方法正在List<AttributeItemNode>和Dictionary<int?, List<AttributeItem>>作爲輸入，我們可以通過使它們成爲標準類變量來避免它們，否則它們會增加每個遞歸c的大小所有這些都會導致更深層次的遞歸問題，從而導致StackOverflow異常。
到PrimarySortMethod，在結果的問題只是供顯示List<AttributeItem>，我已經測試其工作

主要排序方法

private static List<AttributeItemNode> SortAttributeItems(List<AttributeItem> attributeItemList) 
{ 
    // Final Result list 
    List<AttributeItemNode> returnAttributeItemNodeList = new List<AttributeItemNode>(); 

    // Item dictionary to map the children of each AttributeItem 
    var attributeItemDictionary = new Dictionary<int?, List<AttributeItem>>(); 

    // Only Main/Top Level Parents (depth = 0) 
    var parentsMainLevelAttributeItems = attributeItemList.Where(i => i.ParentSortID == Int32.MinValue); 

    // Create a Dictionary mapping/grouping between Parents and Children 
    foreach (var item in attributeItemList) 
    { 
     if (item.ParentSortID != Int32.MinValue) 
     { 
      if (attributeItemDictionary.ContainsKey(item.ParentSortID)) 
       attributeItemDictionary[item.ParentSortID].Add(item); 
      else 
       attributeItemDictionary.Add(item.ParentSortID, new List<AttributeItem> { item }); 
     } 
    } 

    // Add the parents and their children to the result list 
    foreach (var mainItem in parentsMainLevelAttributeItems) 
    { 
     returnAttributeItemNodeList.Add(new AttributeItemNode(mainItem,0)); 

     SortAttributeItems(returnAttributeItemNodeList,attributeItemDictionary,mainItem.ObjectID,0); 
    } 

    return (returnAttributeItemNodeList); 
}

遞歸排序方法

private static void SortAttributeItems(List<AttributeItemNode> attributeItemNodeList, 
             Dictionary<int?, List<AttributeItem>> attributeItemDictionary, 
             int currentParentId, 
             int depth) 
{ 
    // Traverse through all the Children and Add them Recursively 
    foreach (var childItem in attributeItemDictionary[currentParentId]) 
    { 
     attributeItemNodeList.Add(new AttributeItemNode(childItem,depth+1)); 

     if(!attributeItemDictionary.ContainsKey(childItem.ObjectID) || !attributeItemDictionary[childItem.ObjectID].Any()) 
      break; 
     else 
      SortAttributeItems(attributeItemNodeList,attributeItemDictionary,childItem.ObjectID,depth+1); 
    } 
}

來源

2016-10-01 10:05:28

處理大型數據集時，排序算法會導致堆棧溢出？

回答

相關問題