-
Notifications
You must be signed in to change notification settings - Fork 98
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Added text difference finder #25
Changes from all commits
a98361c
e3c07a4
f1fad8e
93f1680
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
.deleted { | ||
background-color: rgb(255, 200, 200); | ||
} | ||
.added { | ||
background-color: rgb(200, 255, 200); | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,45 @@ | ||
<!DOCTYPE html> | ||
<html lang="en"> | ||
|
||
<head> | ||
<meta charset="UTF-8"> | ||
<meta name="viewport" content="width=device-width, initial-scale=1.0"> | ||
<meta http-equiv="X-UA-Compatible" content="ie=edge"> | ||
<title>Document</title> | ||
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/materialize/1.0.0/css/materialize.min.css"> | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Interesting that you have Materialize here. Why not Bootstrap? Or do people use them at the same time usually? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, I can change it to split by character, but I think for English it might be better just to split by words. Maybe we can split by words for alphabet-based languages like English and split by character for character-based languages like Japanese. Also, the metatags are from the default template created by VSCode (text editor). I used Materialize just for the demo since I like it a bit better but you can definitely change that. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah that might be a good idea. People might write mixed things though, so that might be more tricky. Okay cool, Materialize sounds good then! |
||
<link type="text/css" rel="stylesheet" href="css/text_difference.css"> | ||
</head> | ||
|
||
<body> | ||
<div class="container"> | ||
<h1>text difference finder</h1> | ||
<form id="textForm" method="submit"> | ||
<label for="original">Original Text</label> | ||
<input name="original" type="text"> | ||
<label for="changed">Changed Text</label> | ||
<input name="changed" type="text"> | ||
<button class="btn waves-effect waves-light" type="submit" name="action">Submit</button> | ||
</form> | ||
<p id="difference"></p> | ||
<p id="deletions"></p> | ||
<p id="additions"></p> | ||
</div> | ||
|
||
<script src="js/text_difference.js"></script> | ||
<script> | ||
let form = document.getElementById("textForm"); | ||
form.addEventListener("submit", event => { | ||
event.preventDefault(); | ||
let formData = new FormData(form); | ||
let originalText = formData.get("original"); | ||
let changedText = formData.get("changed"); | ||
let [dif, del, add] = findTextDifference(originalText, changedText); | ||
document.getElementById("difference").innerHTML = "<b>Total Difference</b><br>" + dif; | ||
document.getElementById("deletions").innerHTML = "<b>Deletions</b><br>" + del; | ||
document.getElementById("additions").innerHTML = "<b>Additions</b><br>" + add; | ||
}); | ||
</script> | ||
<script src="https://cdnjs.cloudflare.com/ajax/libs/materialize/1.0.0/js/materialize.min.js"></script> | ||
</body> | ||
|
||
</html> |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,101 @@ | ||
/** | ||
* Text difference module | ||
* Relevant functions: | ||
* @method longestCommonSubsequence - used to find common phrases between 2 pieces of text. | ||
* @method getDifferenceStrings - used to construct a string to disply in HTML highlighting differences. | ||
*/ | ||
|
||
// Returns difference strings from 2 pieces of text. | ||
function findTextDifference(originalText, changedText) { | ||
let originalWords = originalText.split(" "); | ||
let changedWords = changedText.split(" "); | ||
|
||
let commonPhrases = longestCommonSubsequence(originalWords, changedWords); | ||
|
||
return getDifferenceStrings(commonPhrases, originalWords, changedWords); | ||
} | ||
|
||
// Dynamic programming longest common subsequence algorithm. | ||
let arr = undefined; | ||
|
||
function longestCommonSubsequence(a, b) { | ||
let m = a.length; | ||
let n = b.length; | ||
arr = [...Array(m + 1)].map(e => Array(n + 1)); | ||
return lcs(a, b, m, n); | ||
} | ||
|
||
function lcs(a, b, m, n) { | ||
if (arr[m][n] != undefined) { | ||
return arr[m][n]; | ||
} | ||
|
||
let result = undefined; | ||
if (m === 0 || n === 0) { | ||
result = []; | ||
} else if (a[m - 1] === b[n - 1]) { | ||
result = lcs(a, b, m - 1, n - 1).concat(a[m - 1]); | ||
} else { | ||
let lcs1 = lcs(a, b, m - 1, n); | ||
let lcs2 = lcs(a, b, m, n - 1); | ||
result = lcs1 > lcs2 ? lcs1 : lcs2; | ||
} | ||
|
||
arr[m][n] = result; | ||
return result; | ||
} | ||
|
||
// Constructs 3 strings that highlight the differences (deletions & additions) between 2 texts. | ||
function getDifferenceStrings(commonPhrases, originalWords, changedWords) { | ||
let differenceStr = ""; | ||
let deletionsStr = ""; | ||
let additionsStr = ""; | ||
|
||
for (let i = 0; i <= commonPhrases.length; i++) { | ||
let phrase = ""; | ||
let phraseIdxO = originalWords.length; | ||
let phraseIdxC = changedWords.length; | ||
|
||
if (i !== commonPhrases.length) { | ||
phrase = commonPhrases[i]; | ||
phraseIdxO = originalWords.indexOf(phrase); | ||
phraseIdxC = changedWords.indexOf(phrase); | ||
} | ||
|
||
let deletedStr = substringFromWordlist(originalWords, 0, phraseIdxO); | ||
let addedStr = substringFromWordlist(changedWords, 0, phraseIdxC); | ||
|
||
differenceStr += ` | ||
<span class="deleted">${deletedStr}</span> | ||
<span class="added">${addedStr}</span> | ||
<span class="unchanged">${phrase}</span> | ||
`; | ||
deletionsStr += ` | ||
<span class="deleted">${deletedStr}</span> | ||
<span class="unchanged">${phrase}</span> | ||
`; | ||
additionsStr += ` | ||
<span class="added">${addedStr}</span> | ||
<span class="unchanged">${phrase}</span> | ||
`; | ||
|
||
originalWords.splice(0, phraseIdxO + 1); | ||
changedWords.splice(0, phraseIdxC + 1); | ||
} | ||
|
||
return [differenceStr, deletionsStr, additionsStr]; | ||
} | ||
|
||
function substringFromWordlist(wordlist, start, end) { | ||
if (end < start) return ""; | ||
let words = wordlist.slice(start, end); | ||
return arrToString(words); | ||
} | ||
|
||
function arrToString(arr) { | ||
str = ""; | ||
for (let i = 0; i < arr.length; i++) { | ||
str += arr[i] + " "; | ||
} | ||
return str; | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Hi, just curious, where did you get all these meta tags? Maybe you used a template?