diff2html/src/diff-parser.ts

434 lines
14 KiB
TypeScript
Raw Normal View History

2019-10-21 22:37:42 +00:00
import { DiffFile, DiffBlock, DiffLine, LineType } from "./types";
2019-10-12 21:45:49 +00:00
import { escapeForRegExp } from "./utils";
export interface DiffParserConfig {
srcPrefix?: string;
dstPrefix?: string;
}
function getExtension(filename: string, language: string): string {
const filenameParts = filename.split(".");
return filenameParts.length > 1 ? filenameParts[filenameParts.length - 1] : language;
}
function startsWithAny(str: string, prefixes: string[]): boolean {
return prefixes.reduce<boolean>((startsWith, prefix) => startsWith || str.startsWith(prefix), false);
}
const baseDiffFilenamePrefixes = ["a/", "b/", "i/", "w/", "c/", "o/"];
function getFilename(line: string, linePrefix?: string, extraPrefix?: string): string {
const prefixes = extraPrefix !== undefined ? [...baseDiffFilenamePrefixes, extraPrefix] : baseDiffFilenamePrefixes;
const FilenameRegExp = linePrefix
? new RegExp(`^${escapeForRegExp(linePrefix)} "?(.+?)"?$`)
: new RegExp('^"?(.+?)"?$');
2019-12-22 18:47:20 +00:00
const [, filename = ""] = FilenameRegExp.exec(line) || [];
2019-10-12 21:45:49 +00:00
const matchingPrefix = prefixes.find(p => filename.indexOf(p) === 0);
const fnameWithoutPrefix = matchingPrefix ? filename.slice(matchingPrefix.length) : filename;
// Cleanup timestamps generated by the unified diff (diff command) as specified in
// https://www.gnu.org/software/diffutils/manual/html_node/Detailed-Unified.html
// Ie: 2016-10-25 11:37:14.000000000 +0200
return fnameWithoutPrefix.replace(/\s+\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}(?:\.\d+)? [-+]\d{4}.*$/, "");
}
function getSrcFilename(line: string, srcPrefix?: string): string | undefined {
return getFilename(line, "---", srcPrefix);
}
function getDstFilename(line: string, dstPrefix?: string): string | undefined {
return getFilename(line, "+++", dstPrefix);
}
/**
*
* Docs:
* - Unified: https://www.gnu.org/software/diffutils/manual/html_node/Unified-Format.html
* - Git Diff: https://git-scm.com/docs/git-diff-tree#_raw_output_format
* - Git Combined Diff: https://git-scm.com/docs/git-diff-tree#_combined_diff_format
*
*/
export function parse(diffInput: string, config: DiffParserConfig = {}): DiffFile[] {
const files: DiffFile[] = [];
let currentFile: DiffFile | null = null;
let currentBlock: DiffBlock | null = null;
let oldLine: number | null = null;
let oldLine2: number | null = null; // Used for combined diff
let newLine: number | null = null;
let possibleOldName: string | null = null;
let possibleNewName: string | null = null;
/* Diff Header */
const oldFileNameHeader = "--- ";
const newFileNameHeader = "+++ ";
const hunkHeaderPrefix = "@@";
/* Diff */
const oldMode = /^old mode (\d{6})/;
const newMode = /^new mode (\d{6})/;
const deletedFileMode = /^deleted file mode (\d{6})/;
const newFileMode = /^new file mode (\d{6})/;
const copyFrom = /^copy from "?(.+)"?/;
const copyTo = /^copy to "?(.+)"?/;
const renameFrom = /^rename from "?(.+)"?/;
const renameTo = /^rename to "?(.+)"?/;
const similarityIndex = /^similarity index (\d+)%/;
const dissimilarityIndex = /^dissimilarity index (\d+)%/;
const index = /^index ([0-9a-z]+)\.\.([0-9a-z]+)\s*(\d{6})?/;
const binaryFiles = /^Binary files (.*) and (.*) differ/;
const binaryDiff = /^GIT binary patch/;
/* Combined Diff */
const combinedIndex = /^index ([0-9a-z]+),([0-9a-z]+)\.\.([0-9a-z]+)/;
const combinedMode = /^mode (\d{6}),(\d{6})\.\.(\d{6})/;
const combinedNewFile = /^new file mode (\d{6})/;
const combinedDeletedFile = /^deleted file mode (\d{6}),(\d{6})/;
const diffLines = diffInput
.replace(/\\ No newline at end of file/g, "")
.replace(/\r\n?/g, "\n")
.split("\n");
/* Add previous block(if exists) before start a new file */
function saveBlock(): void {
if (currentBlock !== null && currentFile !== null) {
currentFile.blocks.push(currentBlock);
currentBlock = null;
}
}
/*
* Add previous file(if exists) before start a new one
* if it has name (to avoid binary files errors)
*/
function saveFile(): void {
if (currentFile !== null) {
if (!currentFile.oldName && possibleOldName !== null) {
currentFile.oldName = possibleOldName;
}
if (!currentFile.newName && possibleNewName !== null) {
currentFile.newName = possibleNewName;
}
if (currentFile.newName) {
files.push(currentFile);
currentFile = null;
}
}
possibleOldName = null;
possibleNewName = null;
}
/* Create file structure */
function startFile(): void {
saveBlock();
saveFile();
// eslint-disable-next-line
// @ts-ignore
currentFile = {
blocks: [],
deletedLines: 0,
addedLines: 0
};
}
function startBlock(line: string): void {
saveBlock();
let values;
/**
* From Range:
* -<start line>[,<number of lines>]
*
* To Range:
* +<start line>[,<number of lines>]
*
* @@ from-file-range to-file-range @@
*
* @@@ from-file-range from-file-range to-file-range @@@
*
* number of lines is optional, if omited consider 0
*/
if (currentFile !== null) {
if ((values = /^@@ -(\d+)(?:,\d+)? \+(\d+)(?:,\d+)? @@.*/.exec(line))) {
currentFile.isCombined = false;
oldLine = parseInt(values[1], 10);
newLine = parseInt(values[2], 10);
} else if ((values = /^@@@ -(\d+)(?:,\d+)? -(\d+)(?:,\d+)? \+(\d+)(?:,\d+)? @@@.*/.exec(line))) {
currentFile.isCombined = true;
oldLine = parseInt(values[1], 10);
oldLine2 = parseInt(values[2], 10);
newLine = parseInt(values[3], 10);
} else {
if (line.startsWith(hunkHeaderPrefix)) {
console.error("Failed to parse lines, starting in 0!");
}
oldLine = 0;
newLine = 0;
currentFile.isCombined = false;
}
}
/* Create block metadata */
// eslint-disable-next-line
// @ts-ignore
currentBlock = {
lines: [],
oldStartLine: oldLine,
oldStartLine2: oldLine2,
newStartLine: newLine,
header: line
};
}
function createLine(line: string): void {
if (currentFile === null || currentBlock === null || oldLine === null || newLine === null) return;
// eslint-disable-next-line
// @ts-ignore
const currentLine: DiffLine = {
content: line
};
const addedPrefixes = currentFile.isCombined ? ["+ ", " +", "++"] : ["+"];
const deletedPrefixes = currentFile.isCombined ? ["- ", " -", "--"] : ["-"];
if (startsWithAny(line, addedPrefixes)) {
currentFile.addedLines++;
currentLine.type = LineType.INSERT;
currentLine.oldNumber = undefined;
currentLine.newNumber = newLine++;
} else if (startsWithAny(line, deletedPrefixes)) {
currentFile.deletedLines++;
currentLine.type = LineType.DELETE;
currentLine.oldNumber = oldLine++;
currentLine.newNumber = undefined;
} else {
currentLine.type = LineType.CONTEXT;
currentLine.oldNumber = oldLine++;
currentLine.newNumber = newLine++;
}
currentBlock.lines.push(currentLine);
}
/*
* Checks if there is a hunk header coming before a new file starts
*
* Hunk header is a group of three lines started by ( `--- ` , `+++ ` , `@@` )
*/
function existHunkHeader(line: string, lineIdx: number): boolean {
let idx = lineIdx;
while (idx < diffLines.length - 3) {
if (line.startsWith("diff")) {
return false;
}
if (
diffLines[idx].startsWith(oldFileNameHeader) &&
diffLines[idx + 1].startsWith(newFileNameHeader) &&
diffLines[idx + 2].startsWith(hunkHeaderPrefix)
) {
return true;
}
idx++;
}
return false;
}
2019-10-13 18:21:19 +00:00
diffLines.forEach((line, lineIndex) => {
2019-10-12 21:45:49 +00:00
// Unmerged paths, and possibly other non-diffable files
// https://github.com/scottgonzalez/pretty-diff/issues/11
// Also, remove some useless lines
if (!line || line.startsWith("*")) {
return;
}
// Used to store regex capture groups
let values;
const prevLine = diffLines[lineIndex - 1];
const nxtLine = diffLines[lineIndex + 1];
const afterNxtLine = diffLines[lineIndex + 2];
if (line.startsWith("diff")) {
startFile();
// diff --git a/blocked_delta_results.png b/blocked_delta_results.png
const gitDiffStart = /^diff --git "?(.+)"? "?(.+)"?/;
if ((values = gitDiffStart.exec(line))) {
possibleOldName = getFilename(values[1], undefined, config.dstPrefix);
possibleNewName = getFilename(values[2], undefined, config.srcPrefix);
}
if (currentFile === null) {
throw new Error("Where is my file !!!");
}
currentFile.isGitDiff = true;
return;
}
if (
!currentFile || // If we do not have a file yet, we should crete one
(!currentFile.isGitDiff &&
currentFile && // If we already have some file in progress and
line.startsWith(oldFileNameHeader) && // If we get to an old file path header line
// And is followed by the new file path header line and the hunk header line
nxtLine.startsWith(newFileNameHeader) &&
afterNxtLine.startsWith(hunkHeaderPrefix))
2019-10-12 21:45:49 +00:00
) {
startFile();
}
/*
* We need to make sure that we have the three lines of the header.
* This avoids cases like the ones described in:
* - https://github.com/rtfpessoa/diff2html/issues/87
*/
if (
(line.startsWith(oldFileNameHeader) && nxtLine.startsWith(newFileNameHeader)) ||
(line.startsWith(newFileNameHeader) && prevLine.startsWith(oldFileNameHeader))
) {
/*
* --- Date Timestamp[FractionalSeconds] TimeZone
* --- 2002-02-21 23:30:39.942229878 -0800
*/
if (
currentFile &&
!currentFile.oldName &&
line.startsWith("--- ") &&
(values = getSrcFilename(line, config.srcPrefix))
) {
currentFile.oldName = values;
currentFile.language = getExtension(currentFile.oldName, currentFile.language);
return;
}
/*
* +++ Date Timestamp[FractionalSeconds] TimeZone
* +++ 2002-02-21 23:30:39.942229878 -0800
*/
if (
currentFile &&
!currentFile.newName &&
line.startsWith("+++ ") &&
(values = getDstFilename(line, config.dstPrefix))
) {
currentFile.newName = values;
currentFile.language = getExtension(currentFile.newName, currentFile.language);
return;
}
}
if (
2019-10-13 18:21:19 +00:00
currentFile &&
(line.startsWith(hunkHeaderPrefix) ||
(currentFile.isGitDiff && currentFile.oldName && currentFile.newName && !currentBlock))
2019-10-12 21:45:49 +00:00
) {
startBlock(line);
return;
}
/*
* There are three types of diff lines. These lines are defined by the way they start.
* 1. New line starts with: +
* 2. Old line starts with: -
* 3. Context line starts with: <SPACE>
*/
if (currentBlock && (line.startsWith("+") || line.startsWith("-") || line.startsWith(" "))) {
createLine(line);
return;
}
const doesNotExistHunkHeader = !existHunkHeader(line, lineIndex);
if (currentFile === null) {
throw new Error("Where is my file !!!");
}
/*
* Git diffs provide more information regarding files modes, renames, copies,
* commits between changes and similarity indexes
*/
if ((values = oldMode.exec(line))) {
currentFile.oldMode = values[1];
} else if ((values = newMode.exec(line))) {
currentFile.newMode = values[1];
} else if ((values = deletedFileMode.exec(line))) {
currentFile.deletedFileMode = values[1];
currentFile.isDeleted = true;
} else if ((values = newFileMode.exec(line))) {
currentFile.newFileMode = values[1];
currentFile.isNew = true;
} else if ((values = copyFrom.exec(line))) {
if (doesNotExistHunkHeader) {
currentFile.oldName = values[1];
}
currentFile.isCopy = true;
} else if ((values = copyTo.exec(line))) {
if (doesNotExistHunkHeader) {
currentFile.newName = values[1];
}
currentFile.isCopy = true;
} else if ((values = renameFrom.exec(line))) {
if (doesNotExistHunkHeader) {
currentFile.oldName = values[1];
}
currentFile.isRename = true;
} else if ((values = renameTo.exec(line))) {
if (doesNotExistHunkHeader) {
currentFile.newName = values[1];
}
currentFile.isRename = true;
} else if ((values = binaryFiles.exec(line))) {
currentFile.isBinary = true;
currentFile.oldName = getFilename(values[1], undefined, config.srcPrefix);
currentFile.newName = getFilename(values[2], undefined, config.dstPrefix);
startBlock("Binary file");
2019-10-13 18:21:19 +00:00
} else if (binaryDiff.test(line)) {
2019-10-12 21:45:49 +00:00
currentFile.isBinary = true;
startBlock(line);
} else if ((values = similarityIndex.exec(line))) {
currentFile.unchangedPercentage = parseInt(values[1], 10);
} else if ((values = dissimilarityIndex.exec(line))) {
currentFile.changedPercentage = parseInt(values[1], 10);
} else if ((values = index.exec(line))) {
currentFile.checksumBefore = values[1];
currentFile.checksumAfter = values[2];
values[3] && (currentFile.mode = values[3]);
} else if ((values = combinedIndex.exec(line))) {
currentFile.checksumBefore = [values[2], values[3]];
currentFile.checksumAfter = values[1];
} else if ((values = combinedMode.exec(line))) {
currentFile.oldMode = [values[2], values[3]];
currentFile.newMode = values[1];
} else if ((values = combinedNewFile.exec(line))) {
currentFile.newFileMode = values[1];
currentFile.isNew = true;
} else if ((values = combinedDeletedFile.exec(line))) {
currentFile.deletedFileMode = values[1];
currentFile.isDeleted = true;
}
});
saveBlock();
saveFile();
return files;
}