文字列JavaScriptを比較する％of可能性あり

Question

2つの文字列を比較し、それらが似ている可能性を返すことができるJavaScript関数を探しています。私はsoundexを見てきましたが、それは複数の単語の文字列や非名前にはあまり適していません。私は次のような関数を探しています：

function compare(strA,strB){ } compare("Apples","Apple") = Some X Percentage.

この関数は、数字、マルチワード値、名前など、すべてのタイプの文字列で機能します。おそらく私が使用できる簡単なアルゴリズムがありますか？

Ultimately none of these served my purpose so I used this: function compare(c, u) { var incept = false; var ca = c.split(","); u = clean(u); //ca = correct answer array (Collection of all correct answer) //caa = a single correct answer Word array (collection of words of a single correct answer) //u = array of user answer words cleaned using custom clean function for (var z = 0; z < ca.length; z++) { caa = $.trim(ca[z]).split(" "); var pc = 0; for (var x = 0; x < caa.length; x++) { for (var y = 0; y < u.length; y++) { if (soundex(u[y]) != null && soundex(caa[x]) != null) { if (soundex(u[y]) == soundex(caa[x])) { pc = pc + 1; } } else { if (u[y].indexOf(caa[x]) > -1) { pc = pc + 1; } } } } if ((pc / caa.length) > 0.5) { return true; } } return false; } // create object listing the SOUNDEX values for each letter // -1 indicates that the letter is not coded, but is used for coding // 0 indicates that the letter is omitted for modern census archives // but acts like -1 for older census archives // 1 is for BFPV // 2 is for CGJKQSXZ // 3 is for DT // 4 is for L // 5 is for MN my home state // 6 is for R function makesoundex() { this.a = -1 this.b = 1 this.c = 2 this.d = 3 this.e = -1 this.f = 1 this.g = 2 this.h = 0 this.i = -1 this.j = 2 this.k = 2 this.l = 4 this.m = 5 this.n = 5 this.o = -1 this.p = 1 this.q = 2 this.r = 6 this.s = 2 this.t = 3 this.u = -1 this.v = 1 this.w = 0 this.x = 2 this.y = -1 this.z = 2 } var sndx = new makesoundex() // check to see that the input is valid function isSurname(name) { if (name == "" || name == null) { return false } else { for (var i = 0; i < name.length; i++) { var letter = name.charAt(i) if (!(letter >= 'a' && letter <= 'z' || letter >= 'A' && letter <= 'Z')) { return false } } } return true } // Collapse out directly adjacent sounds // 1. Assume that surname.length>=1 // 2. Assume that surname contains only lowercase letters function collapse(surname) { if (surname.length == 1) { return surname } var right = collapse(surname.substring(1, surname.length)) if (sndx[surname.charAt(0)] == sndx[right.charAt(0)]) { return surname.charAt(0) + right.substring(1, right.length) } return surname.charAt(0) + right } // Collapse out directly adjacent sounds using the new National Archives method // 1. Assume that surname.length>=1 // 2. Assume that surname contains only lowercase letters // 3. H and W are completely ignored function omit(surname) { if (surname.length == 1) { return surname } var right = omit(surname.substring(1, surname.length)) if (!sndx[right.charAt(0)]) { return surname.charAt(0) + right.substring(1, right.length) } return surname.charAt(0) + right } // Output the coded sequence function output_sequence(seq) { var output = seq.charAt(0).toUpperCase() // Retain first letter output += "-" // Separate letter with a dash var stage2 = seq.substring(1, seq.length) var count = 0 for (var i = 0; i < stage2.length && count < 3; i++) { if (sndx[stage2.charAt(i)] > 0) { output += sndx[stage2.charAt(i)] count++ } } for (; count < 3; count++) { output += "0" } return output } // Compute the SOUNDEX code for the surname function soundex(value) { if (!isSurname(value)) { return null } var stage1 = collapse(value.toLowerCase()) //form.result.value=output_sequence(stage1); var stage1 = omit(value.toLowerCase()) var stage2 = collapse(stage1) return output_sequence(stage2); } function clean(u) { var u = u.replace(/\,/g, ""); u = u.toLowerCase().split(" "); var cw = ["ARRAY OF WORDS TO BE EXCLUDED FROM COMPARISON"]; var n = []; for (var y = 0; y < u.length; y++) { var test = false; for (var z = 0; z < cw.length; z++) { if (u[y] != "" && u[y] != cw[z]) { test = true; break; } } if (test) { //Don't use & or $ in comparison var val = u[y].replace("$", "").replace("&", ""); n.Push(val); } } return n; }

overlord1234 · Answer

レーベンシュタイン距離に基づいた答えがあります https://en.wikipedia.org/wiki/Levenshtein_distance

function similarity(s1, s2) { var longer = s1; var shorter = s2; if (s1.length < s2.length) { longer = s2; shorter = s1; } var longerLength = longer.length; if (longerLength == 0) { return 1.0; } return (longerLength - editDistance(longer, shorter)) / parseFloat(longerLength); }

編集距離を計算するため

function editDistance(s1, s2) { s1 = s1.toLowerCase(); s2 = s2.toLowerCase(); var costs = new Array(); for (var i = 0; i <= s1.length; i++) { var lastValue = i; for (var j = 0; j <= s2.length; j++) { if (i == 0) costs[j] = j; else { if (j > 0) { var newValue = costs[j - 1]; if (s1.charAt(i - 1) != s2.charAt(j - 1)) newValue = Math.min(Math.min(newValue, lastValue), costs[j]) + 1; costs[j - 1] = lastValue; lastValue = newValue; } } } if (i > 0) costs[s2.length] = lastValue; } return costs[s2.length]; }

使用法

similarity('Stack Overflow','Stack Ovrflw')

0.8571428571428571を返します

以下でそれを試すことができます：

function checkSimilarity(){ var str1 = document.getElementById("lhsInput").value; var str2 = document.getElementById("rhsInput").value; document.getElementById("output").innerHTML = similarity(str1, str2); } function similarity(s1, s2) { var longer = s1; var shorter = s2; if (s1.length < s2.length) { longer = s2; shorter = s1; } var longerLength = longer.length; if (longerLength == 0) { return 1.0; } return (longerLength - editDistance(longer, shorter)) / parseFloat(longerLength); } function editDistance(s1, s2) { s1 = s1.toLowerCase(); s2 = s2.toLowerCase(); var costs = new Array(); for (var i = 0; i <= s1.length; i++) { var lastValue = i; for (var j = 0; j <= s2.length; j++) { if (i == 0) costs[j] = j; else { if (j > 0) { var newValue = costs[j - 1]; if (s1.charAt(i - 1) != s2.charAt(j - 1)) newValue = Math.min(Math.min(newValue, lastValue), costs[j]) + 1; costs[j - 1] = lastValue; lastValue = newValue; } } } if (i > 0) costs[s2.length] = lastValue; } return costs[s2.length]; }

<div><label for="lhsInput">String 1:</label> <input type="text" id="lhsInput" oninput="checkSimilarity()" /></div> <div><label for="rhsInput">String 2:</label> <input type="text" id="rhsInput" oninput="checkSimilarity()" /></div> <div>Match: <span id="output">No Input</span></div>

jmort253 · Answer

以下は、比較を行い、同等に基づいてパーセンテージを返す非常に単純な関数です。すべての可能なシナリオでテストされているわけではありませんが、開始に役立つ場合があります。

function similar(a,b) { var lengthA = a.length; var lengthB = b.length; var equivalency = 0; var minLength = (a.length > b.length) ? b.length : a.length; var maxLength = (a.length < b.length) ? b.length : a.length; for(var i = 0; i < minLength; i++) { if(a[i] == b[i]) { equivalency++; } } var weight = equivalency / maxLength; return (weight * 100) + "%"; } alert(similar("test","tes")); // 75% alert(similar("test","test")); // 100% alert(similar("test","testt")); // 80% alert(similar("test","tess")); // 75%

VisioN · Answer

関数similar_text from PHP.jsライブラリ？

PHP関数と同じ名前に基づいています。

function similar_text (first, second) { // Calculates the similarity between two strings // discuss at: http://phpjs.org/functions/similar_text if (first === null || second === null || typeof first === 'undefined' || typeof second === 'undefined') { return 0; } first += ''; second += ''; var pos1 = 0, pos2 = 0, max = 0, firstLength = first.length, secondLength = second.length, p, q, l, sum; max = 0; for (p = 0; p < firstLength; p++) { for (q = 0; q < secondLength; q++) { for (l = 0; (p + l < firstLength) && (q + l < secondLength) && (first.charAt(p + l) === second.charAt(q + l)); l++); if (l > max) { max = l; pos1 = p; pos2 = q; } } } sum = max; if (sum) { if (pos1 && pos2) { sum += this.similar_text(first.substr(0, pos2), second.substr(0, pos2)); } if ((pos1 + max < firstLength) && (pos2 + max < secondLength)) { sum += this.similar_text(first.substr(pos1 + max, firstLength - pos1 - max), second.substr(pos2 + max, secondLength - pos2 - max)); } } return sum; }

Paulpro · Answer

あなたの目的には十分かもしれないとすぐに書いたものです：

function Compare(strA,strB){ for(var result = 0, i = strA.length; i--;){ if(typeof strB[i] == 'undefined' || strA[i] == strB[i]); else if(strA[i].toLowerCase() == strB[i].toLowerCase()) result++; else result += 4; } return 1 - (result + 4*Math.abs(strA.length - strB.length))/(2*(strA.length+strB.length)); }

これは、完全に異なるか欠落している文字と同じように、同じですが異なるケース1四半期の文字の重さを量ります。 0〜1の数値を返します。1は文字列が同一であることを意味します。 0は、類似性がないことを意味します。例：

Compare("Apple", "Apple") // 1 Compare("Apples", "Apple") // 0.8181818181818181 Compare("Apples", "Apple") // 0.7727272727272727 Compare("a", "A") // 0.75 Compare("Apples", "appppp") // 0.45833333333333337 Compare("a", "b") // 0

Tushar Walzade · Answer

文字列の類似性のために this ライブラリを使用すると、私にとって魅力的でした！

ここに例があります-

var similarity = stringSimilarity.compareTwoStrings("Apples","Apple"); // => 0.88

Octavio D&#237;az · Answer

fuzzyset -JavaScript用のファジー文字列セット。 fuzzysetは、データに対して全文検索に類似した何かを実行し、ミススペルの可能性と近似文字列マッチングを決定するデータ構造です。これは、pythonライブラリのjavascriptポートです。