Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added text difference finder #25

Merged
merged 4 commits into from
Nov 14, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions editdojo_project/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,3 +132,8 @@
# https://docs.djangoproject.com/en/2.1/howto/static-files/

STATIC_URL = '/static/'

STATICFILES_DIRS = [
os.path.join(BASE_DIR, "static"),
'/var/www/static/',
]
6 changes: 6 additions & 0 deletions static/css/text_difference.css
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
.deleted {
background-color: rgb(255, 200, 200);
}
.added {
background-color: rgb(200, 255, 200);
}
45 changes: 45 additions & 0 deletions static/demo.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
<!DOCTYPE html>
<html lang="en">

<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hi, just curious, where did you get all these meta tags? Maybe you used a template?

<meta http-equiv="X-UA-Compatible" content="ie=edge">
<title>Document</title>
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/materialize/1.0.0/css/materialize.min.css">
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Interesting that you have Materialize here. Why not Bootstrap? Or do people use them at the same time usually?

Copy link
Contributor Author

@franktzheng franktzheng Nov 13, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, I can change it to split by character, but I think for English it might be better just to split by words. Maybe we can split by words for alphabet-based languages like English and split by character for character-based languages like Japanese.

Also, the metatags are from the default template created by VSCode (text editor).

I used Materialize just for the demo since I like it a bit better but you can definitely change that.

Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah that might be a good idea. People might write mixed things though, so that might be more tricky.

Okay cool, Materialize sounds good then!

<link type="text/css" rel="stylesheet" href="css/text_difference.css">
</head>

<body>
<div class="container">
<h1>text difference finder</h1>
<form id="textForm" method="submit">
<label for="original">Original Text</label>
<input name="original" type="text">
<label for="changed">Changed Text</label>
<input name="changed" type="text">
<button class="btn waves-effect waves-light" type="submit" name="action">Submit</button>
</form>
<p id="difference"></p>
<p id="deletions"></p>
<p id="additions"></p>
</div>

<script src="js/text_difference.js"></script>
<script>
let form = document.getElementById("textForm");
form.addEventListener("submit", event => {
event.preventDefault();
let formData = new FormData(form);
let originalText = formData.get("original");
let changedText = formData.get("changed");
let [dif, del, add] = findTextDifference(originalText, changedText);
document.getElementById("difference").innerHTML = "<b>Total Difference</b><br>" + dif;
document.getElementById("deletions").innerHTML = "<b>Deletions</b><br>" + del;
document.getElementById("additions").innerHTML = "<b>Additions</b><br>" + add;
});
</script>
<script src="https://cdnjs.cloudflare.com/ajax/libs/materialize/1.0.0/js/materialize.min.js"></script>
</body>

</html>
101 changes: 101 additions & 0 deletions static/js/text_difference.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
/**
* Text difference module
* Relevant functions:
* @method longestCommonSubsequence - used to find common phrases between 2 pieces of text.
* @method getDifferenceStrings - used to construct a string to disply in HTML highlighting differences.
*/

// Returns difference strings from 2 pieces of text.
function findTextDifference(originalText, changedText) {
let originalWords = originalText.split(" ");
let changedWords = changedText.split(" ");

let commonPhrases = longestCommonSubsequence(originalWords, changedWords);

return getDifferenceStrings(commonPhrases, originalWords, changedWords);
}

// Dynamic programming longest common subsequence algorithm.
let arr = undefined;

function longestCommonSubsequence(a, b) {
let m = a.length;
let n = b.length;
arr = [...Array(m + 1)].map(e => Array(n + 1));
return lcs(a, b, m, n);
}

function lcs(a, b, m, n) {
if (arr[m][n] != undefined) {
return arr[m][n];
}

let result = undefined;
if (m === 0 || n === 0) {
result = [];
} else if (a[m - 1] === b[n - 1]) {
result = lcs(a, b, m - 1, n - 1).concat(a[m - 1]);
} else {
let lcs1 = lcs(a, b, m - 1, n);
let lcs2 = lcs(a, b, m, n - 1);
result = lcs1 > lcs2 ? lcs1 : lcs2;
}

arr[m][n] = result;
return result;
}

// Constructs 3 strings that highlight the differences (deletions & additions) between 2 texts.
function getDifferenceStrings(commonPhrases, originalWords, changedWords) {
let differenceStr = "";
let deletionsStr = "";
let additionsStr = "";

for (let i = 0; i <= commonPhrases.length; i++) {
let phrase = "";
let phraseIdxO = originalWords.length;
let phraseIdxC = changedWords.length;

if (i !== commonPhrases.length) {
phrase = commonPhrases[i];
phraseIdxO = originalWords.indexOf(phrase);
phraseIdxC = changedWords.indexOf(phrase);
}

let deletedStr = substringFromWordlist(originalWords, 0, phraseIdxO);
let addedStr = substringFromWordlist(changedWords, 0, phraseIdxC);

differenceStr += `
<span class="deleted">${deletedStr}</span>
<span class="added">${addedStr}</span>
<span class="unchanged">${phrase}</span>
`;
deletionsStr += `
<span class="deleted">${deletedStr}</span>
<span class="unchanged">${phrase}</span>
`;
additionsStr += `
<span class="added">${addedStr}</span>
<span class="unchanged">${phrase}</span>
`;

originalWords.splice(0, phraseIdxO + 1);
changedWords.splice(0, phraseIdxC + 1);
}

return [differenceStr, deletionsStr, additionsStr];
}

function substringFromWordlist(wordlist, start, end) {
if (end < start) return "";
let words = wordlist.slice(start, end);
return arrToString(words);
}

function arrToString(arr) {
str = "";
for (let i = 0; i < arr.length; i++) {
str += arr[i] + " ";
}
return str;
}