# include # include # include # include using namespace std; # include "levenshtein.hpp" //****************************************************************************80 int main ( ) //****************************************************************************80 // // Purpose: // // MAIN is the main program for LEVENSHTEIN_TEST. // // Licensing: // // This code is distributed under the GNU LGPL license. // // Modified: // // 19 March 2018 // // Author: // // John Burkardt // { timestamp ( ); cout << "\n"; cout << "LEVENSHTEIN_TEST\n"; cout << " C++ version\n"; cout << " Test the LEVENSHTEIN library.\n"; levenshtein_distance_test ( ); levenshtein_matrix_test ( ); // // Terminate. // cout << "\n"; cout << "LEVENSHTEIN_TEST\n"; cout << " Normal end of execution.\n"; cout << "\n"; timestamp ( ); return 0; } //****************************************************************************80 int i4_min ( int i1, int i2 ) //****************************************************************************80 // // Purpose: // // I4_MIN returns the minimum of two I4's. // // Licensing: // // This code is distributed under the GNU LGPL license. // // Modified: // // 13 October 1998 // // Author: // // John Burkardt // // Parameters: // // Input, int I1, I2, two integers to be compared. // // Output, int I4_MIN, the smaller of I1 and I2. // { int value; if ( i1 < i2 ) { value = i1; } else { value = i2; } return value; } //****************************************************************************80 int levenshtein_distance ( int m, string s, int n, string t ) //****************************************************************************80 // // Purpose: // // LEVENSHTEIN_DISTANCE computes the Levenshtein distance between strings. // // Discussion: // // Let S and T be source and target strings. Consider the task of // converting S to T in the minimal number of steps, involving // * Insertion: insert a new character // * Deletion: delete a character // * Substitution: swap one character for another. // The Levenshtein distance from S to T is the minimal number of such // steps required to transform S into T. // // Licensing: // // This code is distributed under the GNU LGPL license. // // Modified: // // 19 March 2018 // // Author: // // John Burkardt // // Parameters: // // Input, int M, the length of string S. // // Input, string S, the first string. // // Input, int N, the length of string T. // // Input, string T, the second string. // // Output, int LEVENSHTEIN_DISTANCE, the Levenshtein distance between the // two strings. // { int *d; int distance; int i; int j; int substitution_cost; d = new int[(m+1)*(n+1)]; d[0+0*(m+1)] = 0; // // Source prefixes can be transformed into empty string by // dropping all characters, // for ( i = 1; i <= m; i++ ) { d[i+0*(m+1)] = i; } // // Target prefixes can be reached from empty source prefix // by inserting every character. // for ( j = 1; j <= n; j++ ) { d[0+j*(m+1)] = j; } for ( j = 1; j <= n; j++ ) { for ( i = 1; i <= m; i++ ) { if ( s[i-1] == t[j-1] ) { substitution_cost = 0; } else { substitution_cost = 1; } d[i+j*(m+1)] = i4_min ( d[i-1+j*(m+1)] + 1, i4_min ( d[i+(j-1)*(m+1)] + 1, d[i-1+(j-1)*(m+1)] + substitution_cost ) ); } } distance = d[m+n*(m+1)]; delete [] d; return distance; } //****************************************************************************80 void levenshtein_distance_test ( ) //****************************************************************************80 // // Purpose: // // LEVENSHTEIN_DISTANCE_TEST tests LEVENSHTEIN_DISTANCE. // // Licensing: // // This code is distributed under the GNU LGPL license. // // Modified: // // 19 March 2018 // // Author: // // John Burkardt // { int d1; int d2; int m; int n; string s1 = "water"; string s2 = "kitten"; string s3 = "saturday"; string s4 = "pheromones"; string t1 = "wine"; string t2 = "sitting"; string t3 = "sunday"; string t4 = "photographer"; cout << "\n"; cout << "LEVENSHTEIN_DISTANCE_TEST:\n"; cout << " LEVENSHTEIN_DISTANCE computes the Levenshtein distance\n"; cout << " between two strings.\n"; m = s1.length ( ); n = t1.length ( ); d1 = levenshtein_distance ( m, s1, n, t1 ); d2 = 3; cout << "\n"; cout << " S = '" << s1 << "'\n"; cout << " T = '" << t1 << "'\n"; cout << " Computed distance = " << d1 << "\n"; cout << " Correct distance = " << d2 << "\n"; m = s2.length ( ); n = t2.length ( ); d1 = levenshtein_distance ( m, s2, n, t2 ); d2 = 3; cout << "\n"; cout << " S = '" << s2 << "'\n"; cout << " T = '" << t2 << "'\n"; cout << " Computed distance = " << d1 << "\n"; cout << " Correct distance = " << d2 << "\n"; m = s3.length ( ); n = t3.length ( ); d1 = levenshtein_distance ( m, s3, n, t3 ); d2 = 3; cout << "\n"; cout << " S = '" << s3 << "'\n"; cout << " T = '" << t3 << "'\n"; cout << " Computed distance = " << d1 << "\n"; cout << " Correct distance = " << d2 << "\n"; m = s4.length ( ); n = t4.length ( ); d1 = levenshtein_distance ( m, s4, n, t4 ); d2 = 8; cout << "\n"; cout << " S = '" << s4 << "'\n"; cout << " T = '" << t4 << "'\n"; cout << " Computed distance = " << d1 << "\n"; cout << " Correct distance = " << d2 << "\n"; return; } //****************************************************************************80 int *levenshtein_matrix ( int m, string s, int n, string t ) //****************************************************************************80 // // Purpose: // // LEVENSHTEIN_MATRIX computes the Levenshtein distance matrix between strings. // // Discussion: // // Let S and T be source and target strings. Consider the task of // converting S to T in the minimal number of steps, involving // * Insertion: insert a new character // * Deletion: delete a character // * Substitution: swap one character for another. // The Levenshtein distance from S to T is the minimal number of such // steps required to transform S into T. // // Licensing: // // This code is distributed under the GNU LGPL license. // // Modified: // // 18 March 2018 // // Author: // // John Burkardt // // Parameters: // // Parameters: // // Input, int M, the length of string S. // // Input, string S, the first string. // // Input, int N, the length of string T. // // Input, string T, the second string. // // Output, int LEVENSHTEIN_MATRIX[(M+1)*(N+1)], the Levenshtein matrix. // { int *d; int i; int j; int substitution_cost; d = new int[(m+1)*(n+1)]; d[0+0*(m+1)] = 0; // // Source prefixes can be transformed into empty string by // dropping all characters, // for ( i = 1; i <= m; i++ ) { d[i+0*(m+1)] = i; } // // Target prefixes can be reached from empty source prefix // by inserting every character. // for ( j = 1; j <= n; j++ ) { d[0+j*(m+1)] = j; } for ( j = 1; j <= n; j++ ) { for ( i = 1; i <= m; i++ ) { if ( s[i-1] == t[j-1] ) { substitution_cost = 0; } else { substitution_cost = 1; } d[i+j*(m+1)] = i4_min ( d[i-1+j*(m+1)] + 1, i4_min ( d[i+(j-1)*(m+1)] + 1, d[i-1+(j-1)*(m+1)] + substitution_cost ) ); } } return d; } //****************************************************************************80 void levenshtein_matrix_test ( ) //****************************************************************************80 // // Purpose: // // - LEVENSHTEIN_MATRIX_TEST tests LEVENSHTEIN_MATRIX. // // Licensing: // // This code is distributed under the GNU LGPL license. // // Modified: // // 19 March 2018 // // Author: // // John Burkardt // { int *d; int i; int j; int m; int n; string s1 = "water"; string s2 = "kitten"; string s3 = "saturday"; string s4 = "pheromones"; string t1 = "wine"; string t2 = "sitting"; string t3 = "sunday"; string t4 = "photographer"; cout << "\n"; cout << "LEVENSHTEIN_MATRIX_TEST:\n"; cout << " LEVENSHTEIN_MATRIX computes the Levenshtein matrix\n"; cout << " associated with the computation of the Levenshtein\n"; cout << " distance between two strings.\n"; m = s1.length ( ); n = t1.length ( ); d = levenshtein_matrix ( m, s1, n, t1 ); cout << "\n"; cout << " S = '" << s1 << "'\n"; cout << " T = '" << t1 << "'\n"; for ( i = 0; i <= m; i++ ) { for ( j = 0; j <= n; j++ ) { cout << " " << setw(2) << d[i+j*(m+1)]; } cout << "\n"; } delete [] d; m = s2.length ( ); n = t2.length ( ); d = levenshtein_matrix ( m, s2, n, t2 ); cout << "\n"; cout << " S = '" << s2 << "'\n"; cout << " T = '" << t2 << "'\n"; for ( i = 0; i <= m; i++ ) { for ( j = 0; j <= n; j++ ) { cout << " " << setw(2) << d[i+j*(m+1)]; } cout << "\n"; } delete [] d; m = s3.length ( ); n = t3.length ( ); d = levenshtein_matrix ( m, s3, n, t3 ); cout << "\n"; cout << " S = '" << s3 << "'\n"; cout << " T = '" << t3 << "'\n"; for ( i = 0; i <= m; i++ ) { for ( j = 0; j <= n; j++ ) { cout << " " << setw(2) << d[i+j*(m+1)]; } cout << "\n"; } delete [] d; m = s4.length ( ); n = t4.length ( ); d = levenshtein_matrix ( m, s4, n, t4 ); cout << "\n"; cout << " S = '" << s4 << "'\n"; cout << " T = '" << t4 << "'\n"; for ( i = 0; i <= m; i++ ) { for ( j = 0; j <= n; j++ ) { cout << " " << setw(2) << d[i+j*(m+1)]; } cout << "\n"; } delete [] d; return; } //****************************************************************************80 void timestamp ( ) //****************************************************************************80 // // Purpose: // // TIMESTAMP prints the current YMDHMS date as a time stamp. // // Example: // // 31 May 2001 09:45:54 AM // // Licensing: // // This code is distributed under the GNU LGPL license. // // Modified: // // 08 July 2009 // // Author: // // John Burkardt // // Parameters: // // None // { # define TIME_SIZE 40 static char time_buffer[TIME_SIZE]; const struct std::tm *tm_ptr; std::time_t now; now = std::time ( NULL ); tm_ptr = std::localtime ( &now ); std::strftime ( time_buffer, TIME_SIZE, "%d %B %Y %I:%M:%S %p", tm_ptr ); std::cout << time_buffer << "\n"; return; # undef TIME_SIZE }