Fuzzy String Compare - Compare two strings and return a %age confidence that they are the same.
Typical Usage:
- Save the Module Code into a module called something like
modStringFunctions.
- In the code debug window try these:
Function Call
Result (%age)
?fnCompare("Karen Chan","Karen Chan") --> 100
?fnCompare("Karen Chan","Karen Change") --> 90.9
?fnCompare("Karen Chan","Karin Chin") --> 77.8
?fnCompare("Karen Chan","Kareen E Chan") --> 78.2
For your system, you need to decide what %age constitutes equality based on trials.
Module Code
=========================================================================================================
Public Function fnCompare(S1
As Variant, S2 As Variant) As Single
'---- Accepts two strings
for comparison and returns a Similarity percentage.
'---- Remove common words from both strings.
S1 = fnRemoveCommon(S1)
S2 = fnRemoveCommon(S2)
'---- Accommodate null
surname and first name.
If Len(S1) = 0 Then S1 =
" "
If Len(S2) = 0 Then S2 = " "
'---- Compare strings
both ways (first to second then second to first!) and average.
fnCompare = (fnComp2(S1, S2) +
fnComp2(S2, S1)) / 2
End Function
----------------------------------------------------------------------------------------------
Public Function
fnComp2(varS1 As Variant, varS2 As Variant) As Single
Dim intLS1 As Integer, intLS2 As
Integer 'lengths of
two strings
Dim intC As Integer, strC As String
'char position in first string
and char at that position
Dim intC2 As Integer, strC2 As String
'char position in second string
and char at that position
Dim intP As Integer
'highest percentage correlation between chars in first and second strings.
Dim intStart As Integer, intEnd As
Integer 'char range to check in
second string.
Dim intPoss As Integer
'possible percentage correlation
Dim intPTotal As Integer
'total percentage correlation
'---- Establish basic
parameters
intLS1 = Len(varS1)
intLS2 = Len(varS2)
'---- Length of strings
must be within 30% of each other.
If intLS1 /
fnNZTo1(intLS2) >= 0.7 And intLS1 / fnNZTo1(intLS2) <= 1.3 Then
'---- Scroll
through all letters of first string.
For intC = 1
To intLS1
strC = Mid(varS1, intC, 1)
'---- Establish range of chars to check in second string.
intStart = intC - 3: If intStart < 1
Then intStart = 1
intEnd = intC + 3: If intEnd >
intLS2 Then intEnd = intLS2
'---- Scroll through range of chars of second string assigning %ages - keep
highest percentage.
intP = 0
For intC2 = intStart To intEnd
strC2 = Mid(varS2, intC2, 1)
If strC2 = strC Then
intPoss = 100 - Abs(intC2 - intC)
* 20
If intPoss > intP Then intP =
intPoss
End If
Next intC2
intPTotal = intPTotal + intP
Next intC
fnComp2 = intPTotal / intLS1
Else
fnComp2 = 0
End If
End Function
----------------------------------------------------------------------------------------------
Private Function fnRemoveCommon(varS As Variant) As String
If Not IsNull(varS) Then
'---- Remove
punctuation and common words from both strings. Modify/add lines as required.
varS =
Replace(varS, " ", "")
varS = Replace(varS, "'", "")
varS = Replace(varS, ".", "")
varS = Replace(varS, ":", "")
varS = Replace(varS, "-", "")
varS = Replace(varS, "Street", "")
varS = Replace(varS, "Road", "")
varS = Replace(varS, "Place", "")
varS = Replace(varS, "Terrace", "")
varS = Replace(varS, "Avenue", "")
varS = Replace(varS, "Drive", "")
varS = Replace(varS, "Court", "")
varS = Replace(varS, "Close", "")
varS = Replace(varS, "Lane", "")
varS = Replace(varS, "Crescent", "")
varS = Replace(varS, "Grove", "")
varS = Replace(varS, "POBox", "")
varS = Replace(varS, "Highway", "")
varS = Replace(varS, "Parade", "")
Else
varS = " "
End If
fnRemoveCommon = varS
End Function
----------------------------------------------------------------------------------------------
Public Function fnNZTo1(numVal As Variant) As Variant
'---- Returns 1 for 0 and
null values.
If numVal = 0 Or
IsNull(numVal) Then
fnNZTo1 = 1
Else
fnNZTo1 = numVal
End If
End Function
=========================================================================================================
Back to ...
Code Library Menu