Eric Lynch
Действительно поздно отвечать здесь, но мне было любопытно, что даст немного более тщательное исследование. Метод, используемый для анализа/выбора вашей популяции, для такой небольшой популяции (8K элементов), с таким простым фильтром (начинается с "а"), должен быть в значительной степени неуместен. Даже с населением в 100 миллионов предметов худшее, что я мог сделать, было около 2 секунд.
Вместо этого гораздо более вероятно, что ваша проблема связана с генерацией/чтением ваших элементов.
Я предполагаю, что у вас должен быть исключительно медленный читатель CSV. Даже для сравнительно небольшого размера элемента 8K, если у вас нет огромного размера символов на элемент, я ожидаю быстрой производительности даже от самого худшего читателя CSV (о котором я знаю).
Следуют результаты и тестовая упряжь...
Результаты:
StartsWithA: 00:00:31.3843456
GetMatchesIndexCount: 00:00:00.4149453
GetMatchesIndexListAdHoc: 00:00:00.4930803
GetMatchesIndexListPreAllocated: 00:00:00.4762712
GetMatchesIndexListTruncated: 00:00:00.4896025
GetMatchesForeachCount: 00:00:00.4298655
GetMatchesForeachListAdHoc: 00:00:00.4599720
GetMatchesForeachListPreAllocated: 00:00:00.4488830
GetMatchesForeachListTruncated: 00:00:02.0583127
GetMatchesLinqArray: 00:00:00.5453610
GetMatchesLinqList: 00:00:00.4848105
Программа:
using System;
using System.Linq;
using System.Text;
using System.Collections.Generic;
using System.Diagnostics;
namespace PrefixBenchmark
{
public class Program
{
public const int OddsOfA = 26; // 1:26 odds of starting with "a".
public const int SampleCount = 100000000; // Number of strings to use while testing
public const int MaximumLength = 5; // Maximum string length
public static void Main(string[] args)
{
var stopwatch = new Stopwatch();
stopwatch.Restart();
string[] sequence = StartsWithA(SampleCount, new Random(), OddsOfA, MaximumLength);
stopwatch.Stop();
Console.WriteLine($"{nameof(StartsWithA)}: {stopwatch.Elapsed}");
Time(nameof(GetMatchesIndexCount), stopwatch, sequence, GetMatchesIndexCount);
Time(nameof(GetMatchesIndexListAdHoc), stopwatch, sequence, GetMatchesIndexListAdHoc);
Time(nameof(GetMatchesIndexListPreAllocated), stopwatch, sequence, GetMatchesIndexListPreAllocated);
Time(nameof(GetMatchesIndexListTruncated), stopwatch, sequence, GetMatchesIndexListTruncated);
Time(nameof(GetMatchesForeachCount), stopwatch, sequence, GetMatchesForeachCount);
Time(nameof(GetMatchesForeachListAdHoc), stopwatch, sequence, GetMatchesForeachListAdHoc);
Time(nameof(GetMatchesForeachListPreAllocated), stopwatch, sequence, GetMatchesForeachListPreAllocated);
Time(nameof(GetMatchesForeachListTruncated), stopwatch, sequence, GetMatchesForeachListTruncated);
Time(nameof(GetMatchesLinqArray), stopwatch, sequence, GetMatchesLinqArray);
Time(nameof(GetMatchesLinqList), stopwatch, sequence, GetMatchesLinqList);
}
private static T Time<T>(string name, Stopwatch stopwatch, string[] sequence,
Func<string[], T> test)
{
stopwatch.Restart();
T result = test(sequence);
stopwatch.Stop();
Console.WriteLine($"{name}: {stopwatch.Elapsed}");
return result;
}
private static int GetMatchesIndexCount(string[] sequence)
{
int length = sequence.Length;
int count = 0;
for (int index = 0; index < length; index++)
if (sequence[index].StartsWith('a'))
count++;
return count;
}
private static List<string> GetMatchesIndexListAdHoc(string[] sequence)
{
int length = sequence.Length;
var list = new List<string>();
for (int index = 0; index < length; index++)
{
string candidate = sequence[index];
if (candidate.StartsWith('a'))
list.Add(candidate);
}
return list;
}
private static List<string> GetMatchesIndexListPreAllocated(string[] sequence)
{
int length = sequence.Length;
var list = new List<string>(length);
for (int index = 0; index < length; index++)
{
string candidate = sequence[index];
if (candidate.StartsWith('a'))
list.Add(candidate);
}
return list;
}
private static List<string> GetMatchesIndexListTruncated(string[] sequence)
{
int length = sequence.Length;
var list = new List<string>(length);
for (int index = 0; index < length; index++)
{
string candidate = sequence[index];
if (candidate.StartsWith('a'))
list.Add(candidate);
}
list.TrimExcess();
return list;
}
private static int GetMatchesForeachCount(string[] sequence)
{
int count = 0;
foreach (string candidate in sequence)
if (candidate.StartsWith('a'))
count++;
return count;
}
private static List<string> GetMatchesForeachListAdHoc(string[] sequence)
{
var list = new List<string>();
foreach (string candidate in sequence)
if (candidate.StartsWith('a'))
list.Add(candidate);
return list;
}
private static List<string> GetMatchesForeachListPreAllocated(string[] sequence)
{
var list = new List<string>(sequence.Length);
foreach (string candidate in sequence)
if (candidate.StartsWith('a'))
list.Add(candidate);
return list;
}
private static List<string> GetMatchesForeachListTruncated(string[] sequence)
{
var list = new List<string>(sequence.Length);
foreach (string candidate in sequence)
if (candidate.StartsWith('a'))
list.Add(candidate);
list.TrimExcess();
return list;
}
private static string[] GetMatchesLinqArray(string[] sequence) =>
sequence
.Where(candidate => candidate.StartsWith('a'))
.ToArray();
private static List<string> GetMatchesLinqList(string[] sequence) =>
sequence
.Where(candidate => candidate.StartsWith('a'))
.ToList();
/// <summary>
/// Creates a string with a 1:<paramref name="odds"/> likelihood of starting with the letter "a".
/// </summary>
/// <param name="sampleCount">The number of samples to generate.</param>
/// <param name="random">The random number generator.</param>
/// <param name="odds">The denominator of a 1:<paramref name="odds"/> likelihood of selection.</param>
/// <param name="maximumLength">The maximum character length of the string.</param>
/// <returns>A sequence of <paramref name="sampleCount"/> strings with the specified characteristics.</returns>
protected static string[] StartsWithA(int sampleCount, Random random, int odds,
int maximumLength)
{
string[] samples = new string[sampleCount];
for (int index = 0; index < sampleCount; index++)
samples[index] = StartsWithA(random, odds, maximumLength);
return samples;
}
/// <summary>
/// Creates a string with a 1:<paramref name="odds"/> likelihood of starting with the letter "a".
/// </summary>
/// <param name="random">The random number generator.</param>
/// <param name="odds">The denominator of a 1:<paramref name="odds"/> likelihood of selection.</param>
/// <param name="maximumLength">The maximum character length of the string.</param>
/// <returns>A string with the specified characteristics.</returns>
protected static string StartsWithA(Random random, int odds, int maximumLength)
{
int length = random.Next(maximumLength) + 1;
var builder = new StringBuilder(random.Next(maximumLength) + 1);
builder.Append(IsSelected(random, odds) ? 'a' : 'b');
for (int index = 0; index < length; index++)
builder.Append('b');
return builder.ToString();
}
/// <summary>
/// Evaluates a 1:<paramref name="odds"/> likelihood of selection.
/// </summary>
/// <param name="random">The random number generator.</param>
/// <param name="odds">The denominator of a 1:<paramref name="odds"/> likelihood of selection.</param>
/// <returns>True, if a 1:<paramref name="odds"/> event occurs; otherwise, false.</returns>
protected static bool IsSelected(Random random, int odds) =>
random.Next(odds) == 0;
}
}