diff2html/src/diff-parser.ts

468 lines
15 KiB
TypeScript
Raw Normal View History

2019-12-29 22:31:32 +00:00
import { DiffFile, DiffBlock, DiffLine, LineType } from './types';
import { escapeForRegExp } from './utils';
2019-10-12 21:45:49 +00:00
export interface DiffParserConfig {
srcPrefix?: string;
dstPrefix?: string;
diffMaxChanges?: number;
diffMaxLineLength?: number;
2021-03-01 19:47:59 +00:00
diffTooBigMessage?: (fileIndex: number) => string;
2019-10-12 21:45:49 +00:00
}
function getExtension(filename: string, language: string): string {
2019-12-29 22:31:32 +00:00
const filenameParts = filename.split('.');
2019-10-12 21:45:49 +00:00
return filenameParts.length > 1 ? filenameParts[filenameParts.length - 1] : language;
}
function startsWithAny(str: string, prefixes: string[]): boolean {
return prefixes.reduce<boolean>((startsWith, prefix) => startsWith || str.startsWith(prefix), false);
}
2019-12-29 22:31:32 +00:00
const baseDiffFilenamePrefixes = ['a/', 'b/', 'i/', 'w/', 'c/', 'o/'];
2019-10-12 21:45:49 +00:00
function getFilename(line: string, linePrefix?: string, extraPrefix?: string): string {
const prefixes = extraPrefix !== undefined ? [...baseDiffFilenamePrefixes, extraPrefix] : baseDiffFilenamePrefixes;
const FilenameRegExp = linePrefix
? new RegExp(`^${escapeForRegExp(linePrefix)} "?(.+?)"?$`)
: new RegExp('^"?(.+?)"?$');
2019-12-29 22:31:32 +00:00
const [, filename = ''] = FilenameRegExp.exec(line) || [];
2019-10-12 21:45:49 +00:00
const matchingPrefix = prefixes.find(p => filename.indexOf(p) === 0);
const fnameWithoutPrefix = matchingPrefix ? filename.slice(matchingPrefix.length) : filename;
// Cleanup timestamps generated by the unified diff (diff command) as specified in
// https://www.gnu.org/software/diffutils/manual/html_node/Detailed-Unified.html
// Ie: 2016-10-25 11:37:14.000000000 +0200
2019-12-29 22:31:32 +00:00
return fnameWithoutPrefix.replace(/\s+\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}(?:\.\d+)? [+-]\d{4}.*$/, '');
2019-10-12 21:45:49 +00:00
}
function getSrcFilename(line: string, srcPrefix?: string): string | undefined {
2019-12-29 22:31:32 +00:00
return getFilename(line, '---', srcPrefix);
2019-10-12 21:45:49 +00:00
}
function getDstFilename(line: string, dstPrefix?: string): string | undefined {
2019-12-29 22:31:32 +00:00
return getFilename(line, '+++', dstPrefix);
2019-10-12 21:45:49 +00:00
}
/**
*
* Docs:
* - Unified: https://www.gnu.org/software/diffutils/manual/html_node/Unified-Format.html
* - Git Diff: https://git-scm.com/docs/git-diff-tree#_raw_output_format
* - Git Combined Diff: https://git-scm.com/docs/git-diff-tree#_combined_diff_format
*
*/
export function parse(diffInput: string, config: DiffParserConfig = {}): DiffFile[] {
const files: DiffFile[] = [];
let currentFile: DiffFile | null = null;
let currentBlock: DiffBlock | null = null;
let oldLine: number | null = null;
let oldLine2: number | null = null; // Used for combined diff
let newLine: number | null = null;
let possibleOldName: string | null = null;
let possibleNewName: string | null = null;
/* Diff Header */
2019-12-29 22:31:32 +00:00
const oldFileNameHeader = '--- ';
const newFileNameHeader = '+++ ';
const hunkHeaderPrefix = '@@';
2019-10-12 21:45:49 +00:00
/* Diff */
const oldMode = /^old mode (\d{6})/;
const newMode = /^new mode (\d{6})/;
const deletedFileMode = /^deleted file mode (\d{6})/;
const newFileMode = /^new file mode (\d{6})/;
const copyFrom = /^copy from "?(.+)"?/;
const copyTo = /^copy to "?(.+)"?/;
const renameFrom = /^rename from "?(.+)"?/;
const renameTo = /^rename to "?(.+)"?/;
const similarityIndex = /^similarity index (\d+)%/;
const dissimilarityIndex = /^dissimilarity index (\d+)%/;
2019-12-29 22:31:32 +00:00
const index = /^index ([\da-z]+)\.\.([\da-z]+)\s*(\d{6})?/;
2019-10-12 21:45:49 +00:00
const binaryFiles = /^Binary files (.*) and (.*) differ/;
const binaryDiff = /^GIT binary patch/;
/* Combined Diff */
2019-12-29 22:31:32 +00:00
const combinedIndex = /^index ([\da-z]+),([\da-z]+)\.\.([\da-z]+)/;
2019-10-12 21:45:49 +00:00
const combinedMode = /^mode (\d{6}),(\d{6})\.\.(\d{6})/;
const combinedNewFile = /^new file mode (\d{6})/;
const combinedDeletedFile = /^deleted file mode (\d{6}),(\d{6})/;
const diffLines = diffInput
2019-12-29 22:31:32 +00:00
.replace(/\\ No newline at end of file/g, '')
.replace(/\r\n?/g, '\n')
.split('\n');
2019-10-12 21:45:49 +00:00
/* Add previous block(if exists) before start a new file */
function saveBlock(): void {
if (currentBlock !== null && currentFile !== null) {
currentFile.blocks.push(currentBlock);
currentBlock = null;
}
}
/*
* Add previous file(if exists) before start a new one
* if it has name (to avoid binary files errors)
*/
function saveFile(): void {
if (currentFile !== null) {
if (!currentFile.oldName && possibleOldName !== null) {
currentFile.oldName = possibleOldName;
}
if (!currentFile.newName && possibleNewName !== null) {
currentFile.newName = possibleNewName;
}
if (currentFile.newName) {
files.push(currentFile);
currentFile = null;
}
}
possibleOldName = null;
possibleNewName = null;
}
/* Create file structure */
function startFile(): void {
saveBlock();
saveFile();
// eslint-disable-next-line
// @ts-ignore
currentFile = {
blocks: [],
deletedLines: 0,
2019-12-29 22:31:32 +00:00
addedLines: 0,
2019-10-12 21:45:49 +00:00
};
}
function startBlock(line: string): void {
saveBlock();
let values;
/**
* From Range:
* -<start line>[,<number of lines>]
*
* To Range:
* +<start line>[,<number of lines>]
*
* @@ from-file-range to-file-range @@
*
* @@@ from-file-range from-file-range to-file-range @@@
*
* number of lines is optional, if omited consider 0
*/
if (currentFile !== null) {
if ((values = /^@@ -(\d+)(?:,\d+)? \+(\d+)(?:,\d+)? @@.*/.exec(line))) {
currentFile.isCombined = false;
oldLine = parseInt(values[1], 10);
newLine = parseInt(values[2], 10);
} else if ((values = /^@@@ -(\d+)(?:,\d+)? -(\d+)(?:,\d+)? \+(\d+)(?:,\d+)? @@@.*/.exec(line))) {
currentFile.isCombined = true;
oldLine = parseInt(values[1], 10);
oldLine2 = parseInt(values[2], 10);
newLine = parseInt(values[3], 10);
} else {
if (line.startsWith(hunkHeaderPrefix)) {
2019-12-29 22:31:32 +00:00
console.error('Failed to parse lines, starting in 0!');
2019-10-12 21:45:49 +00:00
}
oldLine = 0;
newLine = 0;
currentFile.isCombined = false;
}
}
/* Create block metadata */
currentBlock = {
lines: [],
// eslint-disable-next-line
// @ts-ignore
2019-10-12 21:45:49 +00:00
oldStartLine: oldLine,
// eslint-disable-next-line
// @ts-ignore
2019-10-12 21:45:49 +00:00
oldStartLine2: oldLine2,
// eslint-disable-next-line
// @ts-ignore
2019-10-12 21:45:49 +00:00
newStartLine: newLine,
2019-12-29 22:31:32 +00:00
header: line,
2019-10-12 21:45:49 +00:00
};
}
function createLine(line: string): void {
if (currentFile === null || currentBlock === null || oldLine === null || newLine === null) return;
// eslint-disable-next-line
// @ts-ignore
const currentLine: DiffLine = {
2019-12-29 22:31:32 +00:00
content: line,
2019-10-12 21:45:49 +00:00
};
2019-12-29 22:31:32 +00:00
const addedPrefixes = currentFile.isCombined ? ['+ ', ' +', '++'] : ['+'];
const deletedPrefixes = currentFile.isCombined ? ['- ', ' -', '--'] : ['-'];
2019-10-12 21:45:49 +00:00
if (startsWithAny(line, addedPrefixes)) {
currentFile.addedLines++;
currentLine.type = LineType.INSERT;
currentLine.oldNumber = undefined;
currentLine.newNumber = newLine++;
} else if (startsWithAny(line, deletedPrefixes)) {
currentFile.deletedLines++;
currentLine.type = LineType.DELETE;
currentLine.oldNumber = oldLine++;
currentLine.newNumber = undefined;
} else {
currentLine.type = LineType.CONTEXT;
currentLine.oldNumber = oldLine++;
currentLine.newNumber = newLine++;
}
currentBlock.lines.push(currentLine);
}
/*
* Checks if there is a hunk header coming before a new file starts
*
* Hunk header is a group of three lines started by ( `--- ` , `+++ ` , `@@` )
*/
function existHunkHeader(line: string, lineIdx: number): boolean {
let idx = lineIdx;
while (idx < diffLines.length - 3) {
2019-12-29 22:31:32 +00:00
if (line.startsWith('diff')) {
2019-10-12 21:45:49 +00:00
return false;
}
if (
diffLines[idx].startsWith(oldFileNameHeader) &&
diffLines[idx + 1].startsWith(newFileNameHeader) &&
diffLines[idx + 2].startsWith(hunkHeaderPrefix)
) {
return true;
}
idx++;
}
return false;
}
2019-10-13 18:21:19 +00:00
diffLines.forEach((line, lineIndex) => {
2019-10-12 21:45:49 +00:00
// Unmerged paths, and possibly other non-diffable files
// https://github.com/scottgonzalez/pretty-diff/issues/11
// Also, remove some useless lines
2019-12-29 22:31:32 +00:00
if (!line || line.startsWith('*')) {
2019-10-12 21:45:49 +00:00
return;
}
// Used to store regex capture groups
let values;
const prevLine = diffLines[lineIndex - 1];
const nxtLine = diffLines[lineIndex + 1];
const afterNxtLine = diffLines[lineIndex + 2];
2019-12-29 22:31:32 +00:00
if (line.startsWith('diff')) {
2019-10-12 21:45:49 +00:00
startFile();
// diff --git a/blocked_delta_results.png b/blocked_delta_results.png
const gitDiffStart = /^diff --git "?(.+)"? "?(.+)"?/;
if ((values = gitDiffStart.exec(line))) {
possibleOldName = getFilename(values[1], undefined, config.dstPrefix);
possibleNewName = getFilename(values[2], undefined, config.srcPrefix);
}
if (currentFile === null) {
2019-12-29 22:31:32 +00:00
throw new Error('Where is my file !!!');
2019-10-12 21:45:49 +00:00
}
currentFile.isGitDiff = true;
return;
}
if (
!currentFile || // If we do not have a file yet, we should crete one
(!currentFile.isGitDiff &&
2020-09-04 18:17:32 +00:00
currentFile && // If we already have some file in progress and
line.startsWith(oldFileNameHeader) && // If we get to an old file path header line
// And is followed by the new file path header line and the hunk header line
nxtLine.startsWith(newFileNameHeader) &&
afterNxtLine.startsWith(hunkHeaderPrefix))
2019-10-12 21:45:49 +00:00
) {
startFile();
}
// Ignore remaining diff for current file if marked as too big
if (currentFile?.isTooBig) {
return;
}
2021-03-01 19:47:59 +00:00
if (
currentFile &&
((typeof config.diffMaxChanges === 'number' &&
currentFile.addedLines + currentFile.deletedLines > config.diffMaxChanges) ||
(typeof config.diffMaxLineLength === 'number' && line.length > config.diffMaxLineLength))
2021-03-01 19:47:59 +00:00
) {
currentFile.isTooBig = true;
currentFile.addedLines = 0;
currentFile.deletedLines = 0;
currentFile.blocks = [];
currentBlock = null;
const message =
typeof config.diffTooBigMessage === 'function'
? config.diffTooBigMessage(files.length)
: 'Diff too big to be displayed';
startBlock(message);
return;
}
2019-10-12 21:45:49 +00:00
/*
* We need to make sure that we have the three lines of the header.
* This avoids cases like the ones described in:
* - https://github.com/rtfpessoa/diff2html/issues/87
*/
if (
(line.startsWith(oldFileNameHeader) && nxtLine.startsWith(newFileNameHeader)) ||
(line.startsWith(newFileNameHeader) && prevLine.startsWith(oldFileNameHeader))
) {
/*
* --- Date Timestamp[FractionalSeconds] TimeZone
* --- 2002-02-21 23:30:39.942229878 -0800
*/
if (
currentFile &&
!currentFile.oldName &&
2019-12-29 22:31:32 +00:00
line.startsWith('--- ') &&
2019-10-12 21:45:49 +00:00
(values = getSrcFilename(line, config.srcPrefix))
) {
currentFile.oldName = values;
currentFile.language = getExtension(currentFile.oldName, currentFile.language);
return;
}
/*
* +++ Date Timestamp[FractionalSeconds] TimeZone
* +++ 2002-02-21 23:30:39.942229878 -0800
*/
if (
currentFile &&
!currentFile.newName &&
2019-12-29 22:31:32 +00:00
line.startsWith('+++ ') &&
2019-10-12 21:45:49 +00:00
(values = getDstFilename(line, config.dstPrefix))
) {
currentFile.newName = values;
currentFile.language = getExtension(currentFile.newName, currentFile.language);
return;
}
}
if (
2019-10-13 18:21:19 +00:00
currentFile &&
(line.startsWith(hunkHeaderPrefix) ||
(currentFile.isGitDiff && currentFile.oldName && currentFile.newName && !currentBlock))
2019-10-12 21:45:49 +00:00
) {
startBlock(line);
return;
}
/*
* There are three types of diff lines. These lines are defined by the way they start.
* 1. New line starts with: +
* 2. Old line starts with: -
* 3. Context line starts with: <SPACE>
*/
2019-12-29 22:31:32 +00:00
if (currentBlock && (line.startsWith('+') || line.startsWith('-') || line.startsWith(' '))) {
2019-10-12 21:45:49 +00:00
createLine(line);
return;
}
const doesNotExistHunkHeader = !existHunkHeader(line, lineIndex);
if (currentFile === null) {
2019-12-29 22:31:32 +00:00
throw new Error('Where is my file !!!');
2019-10-12 21:45:49 +00:00
}
/*
* Git diffs provide more information regarding files modes, renames, copies,
* commits between changes and similarity indexes
*/
if ((values = oldMode.exec(line))) {
currentFile.oldMode = values[1];
} else if ((values = newMode.exec(line))) {
currentFile.newMode = values[1];
} else if ((values = deletedFileMode.exec(line))) {
currentFile.deletedFileMode = values[1];
currentFile.isDeleted = true;
} else if ((values = newFileMode.exec(line))) {
currentFile.newFileMode = values[1];
currentFile.isNew = true;
} else if ((values = copyFrom.exec(line))) {
if (doesNotExistHunkHeader) {
currentFile.oldName = values[1];
}
currentFile.isCopy = true;
} else if ((values = copyTo.exec(line))) {
if (doesNotExistHunkHeader) {
currentFile.newName = values[1];
}
currentFile.isCopy = true;
} else if ((values = renameFrom.exec(line))) {
if (doesNotExistHunkHeader) {
currentFile.oldName = values[1];
}
currentFile.isRename = true;
} else if ((values = renameTo.exec(line))) {
if (doesNotExistHunkHeader) {
currentFile.newName = values[1];
}
currentFile.isRename = true;
} else if ((values = binaryFiles.exec(line))) {
currentFile.isBinary = true;
currentFile.oldName = getFilename(values[1], undefined, config.srcPrefix);
currentFile.newName = getFilename(values[2], undefined, config.dstPrefix);
2019-12-29 22:31:32 +00:00
startBlock('Binary file');
2019-10-13 18:21:19 +00:00
} else if (binaryDiff.test(line)) {
2019-10-12 21:45:49 +00:00
currentFile.isBinary = true;
startBlock(line);
} else if ((values = similarityIndex.exec(line))) {
currentFile.unchangedPercentage = parseInt(values[1], 10);
} else if ((values = dissimilarityIndex.exec(line))) {
currentFile.changedPercentage = parseInt(values[1], 10);
} else if ((values = index.exec(line))) {
currentFile.checksumBefore = values[1];
currentFile.checksumAfter = values[2];
values[3] && (currentFile.mode = values[3]);
} else if ((values = combinedIndex.exec(line))) {
currentFile.checksumBefore = [values[2], values[3]];
currentFile.checksumAfter = values[1];
} else if ((values = combinedMode.exec(line))) {
currentFile.oldMode = [values[2], values[3]];
currentFile.newMode = values[1];
2019-12-29 22:31:32 +00:00
// eslint-disable-next-line sonarjs/no-duplicated-branches
2019-10-12 21:45:49 +00:00
} else if ((values = combinedNewFile.exec(line))) {
currentFile.newFileMode = values[1];
currentFile.isNew = true;
2019-12-29 22:31:32 +00:00
// eslint-disable-next-line sonarjs/no-duplicated-branches
2019-10-12 21:45:49 +00:00
} else if ((values = combinedDeletedFile.exec(line))) {
currentFile.deletedFileMode = values[1];
currentFile.isDeleted = true;
}
});
saveBlock();
saveFile();
return files;
}