php判断两个中文字符串相似度,以百分比输出相似度。
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 |
<?php /** * * @author ls * @email lmt@lostphp.com * @date 2022-02-23 04:56:12 * @desc 判断两个字符串 相似度 */ class LCS { public $str1; public $str2; public $c = array(); /* 返回串一和串二的最长公共子序列 */ private function getLCS($str1, $str2, $len1 = 0, $len2 = 0) { $this->str1 = $str1; $this->str2 = $str2; if ($len1 == 0) $len1 = strlen($str1); if ($len2 == 0) $len2 = strlen($str2); $this->initC($len1, $len2); return $this->printLCS($this->c, $len1 - 1, $len2 - 1); } /* 返回两个串的相似度 */ public function getSimilar($str1, $str2) { $len1 = strlen($str1); $len2 = strlen($str2); $len = strlen($this->getLCS($str1, $str2, $len1, $len2)); return round($len * 2 / ($len1 + $len2),2); } private function initC($len1, $len2) { for ($i = 0; $i < $len1; $i++) $this->c[$i][0] = 0; for ($j = 0; $j < $len2; $j++) $this->c[0][$j] = 0; for ($i = 1; $i < $len1; $i++) { for ($j = 1; $j < $len2; $j++) { if ($this->str1[$i] == $this->str2[$j]) { $this->c[$i][$j] = $this->c[$i - 1][$j - 1] + 1; } else if ($this->c[$i - 1][$j] >= $this->c[$i][$j - 1]) { $this->c[$i][$j] = $this->c[$i - 1][$j]; } else { $this->c[$i][$j] = $this->c[$i][$j - 1]; } } } } private function printLCS($c, $i, $j) { if ($i == 0 || $j == 0) { if ($this->str1[$i] == $this->str2[$j]) return $this->str2[$j]; else return ""; } if ($this->str1[$i] == $this->str2[$j]) { return $this->printLCS($this->c, $i - 1, $j - 1) . $this->str2[$j]; } else if ($this->c[$i - 1][$j] >= $this->c[$i][$j - 1]) { return $this->printLCS($this->c, $i - 1, $j); } else { return $this->printLCS($this->c, $i, $j - 1); } } } $c = new LCS(); $t = $c->getSimilar('春秋航空:拟5000万元-1亿元回购股份', '春秋航空:拟5000万元-1亿元回购公司股份'); var_dump($t); //float(0.94) |