blob: 21452f5fc2387482e7e1ef19e730863982eef314 [file] [log] [blame]
//
// Copyright (c) 2010-2023 Antmicro
//
// This file is licensed under the MIT License.
// Full license text is available in 'licenses/MIT.txt'.
//
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Text.RegularExpressions;
namespace Antmicro.Renode.PlatformDescription
{
public static class PreLexer
{
public static IEnumerable<string> Process(string source, string path = "")
{
// We remove '\r' so that we don't have to worry about line endings.
var sourceInLines = source.Replace("\r", string.Empty).Split(new[] { '\n' }, StringSplitOptions.None);
var lineSource = HandleMultilineStrings(sourceInLines, path);
var inputBraceLevel = 0;
var outputBraceLevel = 0;
var lineNo = -1;
var enumerator = HandleComments(lineSource, path).GetEnumerator();
var started = false; // will change to true if the file is not empty (or has only empty lines)
while(enumerator.MoveNext())
{
lineNo++;
if(!string.IsNullOrWhiteSpace(enumerator.Current))
{
started = true;
break;
}
yield return enumerator.Current;
}
if(!started)
{
yield break;
}
var oldLine = enumerator.Current;
var oldIndentLevel = GetIndentLevel(oldLine, lineNo, path);
if(oldIndentLevel != 0)
{
throw GetException(ParsingError.WrongIndent, lineNo, 0, oldLine, "First line with text cannot be indented.", path);
}
if(!enumerator.MoveNext())
{
yield return oldLine;
AccountBraceLevel(oldLine, ref outputBraceLevel);
yield break;
}
var numberOfEmptyLines = 0;
do
{
lineNo++;
AccountBraceLevel(oldLine, ref inputBraceLevel);
var newLine = enumerator.Current;
// pass through all empty lines
while(string.IsNullOrWhiteSpace(newLine))
{
numberOfEmptyLines++;
if(!enumerator.MoveNext())
{
goto finish;
}
newLine = enumerator.Current;
}
if(inputBraceLevel > 0)
{
AccountBraceLevel(oldLine, ref outputBraceLevel);
yield return oldLine;
}
else
{
var newIndentLevel = GetIndentLevel(newLine, lineNo, path);
var result = DecorateLineIfNecessary(oldLine, oldIndentLevel, newIndentLevel, false);
yield return result;
AccountBraceLevel(result, ref outputBraceLevel);
oldIndentLevel = newIndentLevel;
}
for(var i = 0; i < numberOfEmptyLines; i++)
{
yield return "";
}
numberOfEmptyLines = 0;
oldLine = newLine;
}
while(enumerator.MoveNext());
finish:
AccountBraceLevel(oldLine, ref inputBraceLevel);
oldLine = DecorateLineIfNecessary(oldLine, oldIndentLevel, 0, true);
yield return oldLine;
AccountBraceLevel(oldLine, ref outputBraceLevel);
if(inputBraceLevel == 0 && outputBraceLevel != 0)
{
// we only check output brace level if input was balanced, otherwise it does not make sense
throw new ParsingException(ParsingError.InternalPrelexerError, "Internal prelexer error, unbalanced output with balanced input.");
}
for(var i = 0; i < numberOfEmptyLines; i++)
{
yield return "";
}
}
private static IEnumerable<string> HandleComments(IEnumerable<string> lineSource, string path)
{
var inMultilineComment = false;
var localBraceLevel = 0;
var lineNo = -1;
foreach(var originalLine in lineSource)
{
lineNo++;
var line = originalLine;
var currentIndex = 0;
if(inMultilineComment)
{
var closingIndex = line.IndexOf("*/", StringComparison.InvariantCulture);
if(closingIndex == -1)
{
yield return string.Empty; // no need to adjust brace level
continue;
}
if(localBraceLevel == 0 && closingIndex != line.TrimEnd().Length - 2)
{
throw GetException(ParsingError.SyntaxError, lineNo, closingIndex, originalLine, "Multiline comment in indent mode can only finish at the end of the line.", path);
}
var newLine = new StringBuilder(line);
for(var i = 0; i <= closingIndex + 1; i++)
{
newLine[i] = ' ';
}
line = newLine.ToString();
inMultilineComment = false;
}
while(true)
{
switch(FindInLine(line, ref currentIndex))
{
case FindResult.Nothing:
AccountBraceLevel(line, ref localBraceLevel);
yield return line;
goto next;
case FindResult.SingleLineCommentStart:
line = line.Substring(0, currentIndex - 1);
AccountBraceLevel(line, ref localBraceLevel);
yield return line;
goto next;
case FindResult.StringStart:
currentIndex++;
while(true)
{
currentIndex = line.IndexOf('"', currentIndex) + 1;
if(currentIndex == 0) // means that IndexOf returned -1
{
throw GetException(ParsingError.SyntaxError, lineNo, originalLine.Length - 1, originalLine, "Unterminated string.", path);
}
// if this is escaped quote, just ignore it
if(!IsEscapedPosition(line, currentIndex - 1))
{
break;
}
}
break;
case FindResult.MultilineCommentStart:
var nextIndex = line.IndexOf("*/", currentIndex + 1, StringComparison.InvariantCulture) + 2;
if(nextIndex == 1) // means that IndexOf returned -1
{
inMultilineComment = true;
line = line.Substring(0, currentIndex - 1);
AccountBraceLevel(line, ref localBraceLevel);
yield return line;
goto next;
}
if(localBraceLevel == 0)
{
if(line.Length - line.TrimStart().Length + 1 == currentIndex && // comment is the first meaningful thing in line
line.TrimEnd().Length != nextIndex) // but not the last one
{
throw GetException(ParsingError.SyntaxError, lineNo, currentIndex, originalLine,
"Single line multiline comment in indent mode cannot be the first non-whitespace element of a line if it is does not span to the end of the line.",
path);
}
}
var newLine = new StringBuilder(line);
for(var i = currentIndex - 1; i < nextIndex; i++)
{
newLine[i] = ' ';
}
line = newLine.ToString();
currentIndex = nextIndex;
break;
}
}
next:;
}
}
private static FindResult FindInLine(string line, ref int currentIndex)
{
for(; currentIndex < line.Length; currentIndex++)
{
switch(line[currentIndex])
{
case '"':
return FindResult.StringStart;
case '/':
if(line.Length > currentIndex + 1)
{
currentIndex++;
var nextChar = line[currentIndex];
if(nextChar == '*')
{
return FindResult.MultilineCommentStart;
}
else if(nextChar == '/')
{
if(currentIndex == 1 || (currentIndex >= 2 && line[currentIndex - 2] == ' '))
{
return FindResult.SingleLineCommentStart;
}
}
}
break;
}
}
return FindResult.Nothing;
}
public static IEnumerable<string> HandleMultilineStrings(IEnumerable<string> sourceLine, string path)
{
const string quoteDelimiter = "'''";
var multilineString = new List<string>();
var inMultilineString = false;
var regexCharacterToFind = new Regex(Regex.Escape(quoteDelimiter), RegexOptions.None);
var openingQuoteLine = new Tuple<int, string, int>(-1, "", -1);
foreach(var currentLine in sourceLine.Select((value, index) => new { index, value }))
{
int lastQuoteIndex = -1;
var line = currentLine.value;
var validQuotes = CountUnescapedCharacters(line, regexCharacterToFind, out lastQuoteIndex);
if(inMultilineString)
{
multilineString.Add(line);
if(validQuotes >= 1)
{
inMultilineString = false;
var str = string.Join("\n", multilineString);
multilineString.Clear();
yield return str;
}
}
else
{
if(validQuotes != 1) // we have opening and closing quote (or more) or no quotes at all
{
yield return line;
continue;
}
openingQuoteLine = new Tuple<int, string, int>(currentLine.index, currentLine.value, lastQuoteIndex);
inMultilineString = true;
multilineString.Add(line);
}
}
if(inMultilineString) //if closing quote was never found
{
var errorLine = openingQuoteLine.Item2;
var errorQuoteIndex = openingQuoteLine.Item3;
throw GetException(ParsingError.SyntaxError, openingQuoteLine.Item1, errorQuoteIndex, errorLine,
"Unclosed multiline string", path);
}
}
public static int CountUnescapedCharacters(string line, Regex regexCharacterToFind, out int lastQuoteIndex, char escapeCharacter = '\\')
{
var matches = regexCharacterToFind.Matches(line);
var validQuotesCount = 0;
var validQuotesIndexes = new List<int>();
foreach(Match ma in matches)
{
var index = ma.Index - 1;
var startIndex = index;
var backslashCount = 0;
while(index >= 0 && line[index] == escapeCharacter)
{
backslashCount++;
index--;
}
if(backslashCount % 2 == 0)
{
validQuotesCount++;
validQuotesIndexes.Add(startIndex);
}
}
lastQuoteIndex = (validQuotesIndexes.Count != 0) ? validQuotesIndexes[validQuotesIndexes.Count - 1] : -1;
return validQuotesCount;
}
private static string DecorateLineIfNecessary(string line, int oldIndentLevel, int newIndentLevel, bool doNotInsertSemicolon)
{
var builder = new StringBuilder(line);
if(newIndentLevel > oldIndentLevel)
{
return builder.Append('{', newIndentLevel - oldIndentLevel).ToString();
}
if((newIndentLevel < oldIndentLevel))
{
builder.Append('}', oldIndentLevel - newIndentLevel);
if(!doNotInsertSemicolon)
{
builder.Append(';');
}
return builder.ToString();
}
if(string.IsNullOrWhiteSpace(line))
{
return line;
}
return doNotInsertSemicolon ? line : line + ';';
}
private static ParsingException GetException(ParsingError error, int lineNo, int columnNo, string line, string message, string path)
{
message = string.Format("Error E{0:D2}: ", (int)error) + message + Environment.NewLine +
string.Format("At {0}{1}:{2}:", path == "" ? "" : path + ':', lineNo + 1, columnNo + 1) + Environment.NewLine +
line + Environment.NewLine + new string(' ', columnNo) + "^";
throw new ParsingException(error, message);
}
private static int GetIndentLevel(string line, int lineNo, string path)
{
var spacesNo = line.TakeWhile(x => x == ' ').Count();
if((spacesNo % SpacesPerIndent) != 0)
{
throw GetException(ParsingError.WrongIndent, lineNo, spacesNo - 1, line,
string.Format("Indent's length has to be multiple of {0}, this one is {1} spaces long.", SpacesPerIndent, spacesNo), path);
}
return spacesNo / SpacesPerIndent;
}
private static void AccountBraceLevel(string line, ref int braceLevel)
{
// we have to not take braces inside string into account; comments are already removed
var inString = false;
for(var i = 0; i < line.Length; i++)
{
var element = line[i];
if(!inString)
{
braceLevel += element == '{' ? 1 : element == '}' ? -1 : 0;
}
if(line[i] == '"' && !IsEscapedPosition(line, i))
{
inString = !inString;
}
}
}
private static bool IsEscapedPosition(string str, int position)
{
int numEscapes = 0;
while(position - 1 - numEscapes >= 0 && str[position - 1 - numEscapes] == '\\')
{
numEscapes++;
}
// if there's an odd number of backslashes before this position, it is escaped
return numEscapes % 2 == 1;
}
private const int SpacesPerIndent = 4;
private enum FindResult
{
Nothing,
StringStart,
MultilineCommentStart,
SingleLineCommentStart
}
}
}