|
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.
using System;
using System.Collections.Generic;
using System.Threading;
using System.Threading.Tasks;
using Microsoft.CodeAnalysis.PooledObjects;
using Microsoft.CodeAnalysis.Shared.Extensions;
using Microsoft.CodeAnalysis.Text;
using Roslyn.Utilities;
namespace Microsoft.CodeAnalysis.Classification
{
/// <summary>
/// Computes a syntactic text change range that determines the range of a document that was changed by an edit. The
/// portions outside this change range are guaranteed to be syntactically identical (see <see
/// cref="SyntaxNode.IsIncrementallyIdenticalTo"/>). This algorithm is intended to be <em>fast</em>. It is
/// technically linear in the number of nodes and tokens that may need to examined. However, in practice, it should
/// operate in sub-linear time as it will bail the moment tokens don't match, and it's able to skip over matching
/// nodes fully without examining the contents of those nodes. This is intended for consumers that want a
/// reasonably accurate change range computer, but do not want to spend an inordinate amount of time getting the
/// most accurate and minimal result possible.
/// </summary>
/// <remarks>
/// This computation is not guaranteed to be minimal. It may return a range that includes parts that are unchanged.
/// This means it is also legal for the change range to just specify the entire file was changed. The quality of
/// results will depend on how well the parsers did with incremental parsing, and how much time is given to do the
/// comparison. In practice, for large files (i.e. 15kloc) with standard types of edits, this generally returns
/// results in around 50-100 usecs on a i7 3GHz desktop.
/// <para>
/// This algorithm will respect the timeout provided to the best of abilities. If any information has been computed
/// when the timeout elapses, it will be returned.
/// </para>
/// </remarks>
internal static class SyntacticChangeRangeComputer
{
private static readonly ObjectPool<Stack<SyntaxNodeOrToken>> s_pool = new(() => new());
public static TextChangeRange ComputeSyntacticChangeRange(SyntaxNode oldRoot, SyntaxNode newRoot, TimeSpan timeout, CancellationToken cancellationToken)
{
if (oldRoot == newRoot)
return default;
var stopwatch = SharedStopwatch.StartNew();
// We will be comparing the trees for two documents like so:
//
// --------------------------------------------
// old: | |
// --------------------------------------------
//
// ---------------------------------------------------
// new: | |
// ---------------------------------------------------
//
// (Note that `new` could be smaller or the same length as `old`, it makes no difference).
//
// The algorithm will sweep in from both sides, as long as the nodes and tokens it's touching on each side
// are 'identical' (i.e. are the exact same green node, and were thus reused over an incremental parse.).
// This will leave us with:
//
// --------------------------------------------
// old: | CLW | | CRW |
// --------------------------------------------
// | | \ \
// ---------------------------------------------------
// new: | CLW | | CRW |
// ---------------------------------------------------
//
// Where CLW and CRW refer to the common-left-width and common-right-width respectively. The part in between
// this s the change range:
//
// --------------------------------------------
// old: | |**********************| |
// --------------------------------------------
// |**************************\
// ---------------------------------------------------
// new: | |*****************************| |
// ---------------------------------------------------
//
// The Span changed will go from `[CLW, Old_Width - CRW)`, and the NewLength will be `New_Width - CLW - CRW`
var commonLeftWidth = ComputeCommonLeftWidth();
if (commonLeftWidth == null)
{
// The trees were effectively identical (even if the children were different). Return that there was no
// text change.
return default;
}
// Only compute the right side if we have time for it. Otherwise, assume there is nothing in common there.
var commonRightWidth = 0;
if (stopwatch.Elapsed < timeout)
commonRightWidth = ComputeCommonRightWidth();
var oldRootWidth = oldRoot.FullWidth();
var newRootWidth = newRoot.FullWidth();
Contract.ThrowIfTrue(commonLeftWidth > oldRootWidth);
Contract.ThrowIfTrue(commonLeftWidth > newRootWidth);
Contract.ThrowIfTrue(commonRightWidth > oldRootWidth);
Contract.ThrowIfTrue(commonRightWidth > newRootWidth);
// it's possible for the common left/right to overlap. This can happen as some tokens
// in the parser have a true shared underlying state, so they may get consumed both on
// a leftward and rightward walk. Cap the right width so that it never overlaps hte left
// width in either the old or new tree.
commonRightWidth = Math.Min(commonRightWidth, oldRootWidth - commonLeftWidth.Value);
commonRightWidth = Math.Min(commonRightWidth, newRootWidth - commonLeftWidth.Value);
return new TextChangeRange(
TextSpan.FromBounds(start: commonLeftWidth.Value, end: oldRootWidth - commonRightWidth),
newRootWidth - commonLeftWidth.Value - commonRightWidth);
int? ComputeCommonLeftWidth()
{
using var leftOldStack = s_pool.GetPooledObject();
using var leftNewStack = s_pool.GetPooledObject();
var oldStack = leftOldStack.Object;
var newStack = leftNewStack.Object;
oldStack.Push(oldRoot);
newStack.Push(newRoot);
while (oldStack.Count > 0 && newStack.Count > 0)
{
cancellationToken.ThrowIfCancellationRequested();
var currentOld = oldStack.Pop();
var currentNew = newStack.Pop();
Contract.ThrowIfFalse(currentOld.FullSpan.Start == currentNew.FullSpan.Start);
// If the two nodes/tokens were the same just skip past them. They're part of the common left width.
if (currentOld.IsIncrementallyIdenticalTo(currentNew))
continue;
// if we reached a token for either of these, then we can't break things down any further, and we hit
// the furthest point they are common.
if (currentOld.IsToken || currentNew.IsToken)
return currentOld.FullSpan.Start;
// Similarly, if we've run out of time, just return what we've computed so far. It's not as accurate as
// we could be. But the caller wants the results asap.
if (stopwatch.Elapsed > timeout)
return currentOld.FullSpan.Start;
// we've got two nodes, but they weren't the same. For example, say we made an edit in a method in the
// class, the class node would be new, but there might be many member nodes that were the same that we'd
// want to see and skip. Crumble the node and deal with its left side.
//
// Reverse so that we process the leftmost child first and walk left to right.
foreach (var nodeOrToken in currentOld.AsNode()!.ChildNodesAndTokens().Reverse())
oldStack.Push(nodeOrToken);
foreach (var nodeOrToken in currentNew.AsNode()!.ChildNodesAndTokens().Reverse())
newStack.Push(nodeOrToken);
}
// If we consumed all of 'new', then the length of the new doc is what we have in common.
if (oldStack.Count > 0)
return newRoot.FullSpan.Length;
// If we consumed all of 'old', then the length of the old doc is what we have in common.
if (newStack.Count > 0)
return oldRoot.FullSpan.Length;
// We consumed both stacks entirely. That means the trees were identical (though the root was different). Return null to signify no change to the doc.
return null;
}
int ComputeCommonRightWidth()
{
using var rightOldStack = s_pool.GetPooledObject();
using var rightNewStack = s_pool.GetPooledObject();
var oldStack = rightOldStack.Object;
var newStack = rightNewStack.Object;
oldStack.Push(oldRoot);
newStack.Push(newRoot);
while (oldStack.Count > 0 && newStack.Count > 0)
{
cancellationToken.ThrowIfCancellationRequested();
var currentOld = oldStack.Pop();
var currentNew = newStack.Pop();
// The width on the right we've moved past on both old/new should be the same.
Contract.ThrowIfFalse((oldRoot.FullSpan.End - currentOld.FullSpan.End) ==
(newRoot.FullSpan.End - currentNew.FullSpan.End));
// If the two nodes/tokens were the same just skip past them. They're part of the common right width.
// Critically though, we can only skip past if this wasn't already something we consumed when determining
// the common left width. If this was common the left side, we can't consider it common to the right,
// otherwise we could end up with overlapping regions of commonality.
//
// This can occur in incremental settings when the similar tokens are written successsively.
// Because the parser can reuse underlying token data, it may end up with many incrementally
// identical tokens in a row.
if (currentOld.IsIncrementallyIdenticalTo(currentNew) &&
currentOld.FullSpan.Start >= commonLeftWidth &&
currentNew.FullSpan.Start >= commonLeftWidth)
{
continue;
}
// if we reached a token for either of these, then we can't break things down any further, and we hit
// the furthest point they are common.
if (currentOld.IsToken || currentNew.IsToken)
return oldRoot.FullSpan.End - currentOld.FullSpan.End;
// Similarly, if we've run out of time, just return what we've computed so far. It's not as accurate as
// we could be. But the caller wants the results asap.
if (stopwatch.Elapsed > timeout)
return oldRoot.FullSpan.End - currentOld.FullSpan.End;
// we've got two nodes, but they weren't the same. For example, say we made an edit in a method in the
// class, the class node would be new, but there might be many member nodes following the edited node
// that were the same that we'd want to see and skip. Crumble the node and deal with its right side.
//
// Do not reverse the children. We want to process the rightmost child first and walk right to left.
foreach (var nodeOrToken in currentOld.AsNode()!.ChildNodesAndTokens())
oldStack.Push(nodeOrToken);
foreach (var nodeOrToken in currentNew.AsNode()!.ChildNodesAndTokens())
newStack.Push(nodeOrToken);
}
// If we consumed all of 'new', then the length of the new doc is what we have in common.
if (oldStack.Count > 0)
return newRoot.FullSpan.Length;
// If we consumed all of 'old', then the length of the old doc is what we have in common.
if (newStack.Count > 0)
return oldRoot.FullSpan.Length;
// We consumed both stacks entirely. That means the trees were identical (though the root was
// different). We should never get here. If we were the same, then walking from the left should have
// consumed everything and already bailed out.
throw ExceptionUtilities.Unreachable();
}
}
}
}
|