Skip to content

Commit a239f17

Browse files
committed
Optimize stripping empty/blank lines
For calculating the similarity ratio when empty/blank lines are ignored, these lines have to be stripped from the sequences beforehand. The stripped lines are restored after calculation so the class can also be used as sequenceMatcher.
1 parent 75f5ce0 commit a239f17

File tree

1 file changed

+65
-35
lines changed

1 file changed

+65
-35
lines changed

lib/jblond/Diff/Similarity.php

Lines changed: 65 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,11 @@ class Similarity extends SequenceMatcher
3737
* @var array Count of each unique sequence at version 2.
3838
*/
3939
private $uniqueCount2;
40+
/**
41+
* @var array Contains the indexes of lines which are stripped from the sequences by Similarity::stripLines().
42+
* @see Similarity::stripLines()
43+
*/
44+
private $stripped = ['old' => [], 'new' => []];
4045

4146

4247
/**
@@ -65,15 +70,22 @@ public function setSeq2($version2)
6570
*/
6671
public function getSimilarity(int $type = self::CALC_DEFAULT): float
6772
{
73+
if ($this->options['ignoreLines']) {
74+
// Backup original sequences and filter non blank lines.
75+
$this->stripLines();
76+
}
77+
6878
switch ($type) {
6979
case self::CALC_FAST:
70-
return $this->getRatioFast();
80+
$ratio = $this->getRatioFast();
81+
$this->restoreLines();
82+
break;
7183
case self::CALC_FASTEST:
72-
return $this->getRatioFastest();
84+
$ratio = $this->getRatioFastest();
85+
$this->restoreLines();
86+
break;
7387
default:
74-
if ($this->options['ignoreLines']) {
75-
$this->stripLines();
76-
}
88+
$this->setSequences($this->old, $this->new);
7789
$matches = array_reduce(
7890
$this->getMatchingBlocks(),
7991
function ($carry, $item) {
@@ -82,8 +94,44 @@ function ($carry, $item) {
8294
0
8395
);
8496

85-
return $this->calculateRatio($matches, count($this->old) + count($this->new));
86-
// TODO: Restore original (un-stripped) versions?
97+
$ratio = $this->calculateRatio($matches, count($this->old) + count($this->new));
98+
$this->restoreLines();
99+
$this->setSequences($this->old, $this->new);
100+
}
101+
102+
return $ratio;
103+
}
104+
105+
/**
106+
* Strip empty or blank lines from the sequences to compare.
107+
*
108+
*/
109+
private function stripLines(): void
110+
{
111+
foreach (['old', 'new'] as $version) {
112+
// Remove empty lines.
113+
$this->$version = array_filter(
114+
$this->$version,
115+
function ($line, $index) use ($version) {
116+
$sanitizedLine = $line;
117+
if ($this->options['ignoreLines'] == self::DIFF_IGNORE_LINE_BLANK) {
118+
$sanitizedLine = trim($line);
119+
}
120+
121+
if ($sanitizedLine == '') {
122+
// Store line to be able to restore later.
123+
$this->stripped[$version][$index] = $line;
124+
125+
return false;
126+
}
127+
128+
return true;
129+
},
130+
ARRAY_FILTER_USE_BOTH
131+
);
132+
133+
// Re-index sequence.
134+
$this->$version = array_values($this->$version);
87135
}
88136
}
89137

@@ -97,6 +145,7 @@ function ($carry, $item) {
97145
private function getRatioFast(): float
98146
{
99147
if ($this->uniqueCount2 === null) {
148+
// Build unless cached.
100149
$this->uniqueCount2 = [];
101150
$bLength = count($this->new);
102151
for ($iterator = 0; $iterator < $bLength; ++$iterator) {
@@ -140,6 +189,15 @@ private function calculateRatio(int $matches, int $length = 0): float
140189
return $returnValue;
141190
}
142191

192+
private function restoreLines()
193+
{
194+
foreach (['old', 'new'] as $version) {
195+
foreach ($this->stripped[$version] as $index => $line) {
196+
array_splice($this->$version, $index, 0, $line);
197+
}
198+
}
199+
}
200+
143201
/**
144202
* Return an upper bound ratio really quickly for the similarity of the strings.
145203
*
@@ -155,34 +213,6 @@ private function getRatioFastest(): float
155213
return $this->calculateRatio(min($aLength, $bLength), $aLength + $bLength);
156214
}
157215

158-
/**
159-
* Strip empty or blank lines from the sequences to compare.
160-
*
161-
*/
162-
private function stripLines(): void
163-
{
164-
foreach (['old', 'new'] as $version) {
165-
if ($this->options['ignoreLines'] == self::DIFF_IGNORE_LINE_BLANK) {
166-
array_walk(
167-
$this->$version,
168-
function (&$line) {
169-
$line = trim($line);
170-
}
171-
);
172-
unset($line);
173-
}
174-
175-
$this->$version = array_filter(
176-
$this->$version,
177-
function ($line) {
178-
return $line != '';
179-
}
180-
);
181-
}
182-
183-
$this->setSequences(array_values($this->old), array_values($this->new));
184-
}
185-
186216
/**
187217
* Helper function to calculate the number of matches for Ratio().
188218
*

0 commit comments

Comments
 (0)