Page Menu
Home
Phorge
Search
Configure Global Search
Log In
Files
F117753489
HtmlDiff.php
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Flag For Later
Award Token
Authored By
Unknown
Size
20 KB
Referenced Files
None
Subscribers
None
HtmlDiff.php
View Options
<?php
namespace
Caxy\HtmlDiff
;
class
HtmlDiff
{
public
static
$defaultSpecialCaseTags
=
array
(
'strong'
,
'b'
,
'i'
,
'big'
,
'small'
,
'u'
,
'sub'
,
'sup'
,
'strike'
,
's'
,
'p'
);
public
static
$defaultSpecialCaseChars
=
array
(
'.'
,
','
,
'('
,
')'
,
'
\'
'
);
public
static
$defaultGroupDiffs
=
true
;
protected
$content
;
protected
$oldText
;
protected
$newText
;
protected
$oldWords
=
array
();
protected
$newWords
=
array
();
protected
$wordIndices
;
protected
$encoding
;
protected
$specialCaseOpeningTags
=
array
();
protected
$specialCaseClosingTags
=
array
();
protected
$specialCaseTags
;
protected
$specialCaseChars
;
protected
$groupDiffs
;
protected
$insertSpaceInReplace
=
false
;
public
function
__construct
(
$oldText
,
$newText
,
$encoding
=
'UTF-8'
,
$specialCaseTags
=
null
,
$groupDiffs
=
null
)
{
if
(
$specialCaseTags
===
null
)
{
$specialCaseTags
=
static
::
$defaultSpecialCaseTags
;
}
if
(
$groupDiffs
===
null
)
{
$groupDiffs
=
static
::
$defaultGroupDiffs
;
}
$this
->
oldText
=
$this
->
purifyHtml
(
trim
(
$oldText
));
$this
->
newText
=
$this
->
purifyHtml
(
trim
(
$newText
));
$this
->
encoding
=
$encoding
;
$this
->
content
=
''
;
$this
->
groupDiffs
=
$groupDiffs
;
$this
->
setSpecialCaseTags
(
$specialCaseTags
);
$this
->
setSpecialCaseChars
(
static
::
$defaultSpecialCaseChars
);
}
/**
* @param boolean $boolean
* @return HtmlDiff
*/
public
function
setInsertSpaceInReplace
(
$boolean
)
{
$this
->
insertSpaceInReplace
=
$boolean
;
return
$this
;
}
/**
* @return boolean
*/
public
function
getInsertSpaceInReplace
()
{
return
$this
->
insertSpaceInReplace
;
}
public
function
setSpecialCaseChars
(
array
$chars
)
{
$this
->
specialCaseChars
=
$chars
;
}
public
function
getSpecialCaseChars
()
{
return
$this
->
specialCaseChars
;
}
public
function
addSpecialCaseChar
(
$char
)
{
if
(!
in_array
(
$char
,
$this
->
specialCaseChars
))
{
$this
->
specialCaseChars
[]
=
$char
;
}
}
public
function
removeSpecialCaseChar
(
$char
)
{
$key
=
array_search
(
$char
,
$this
->
specialCaseChars
);
if
(
$key
!==
false
)
{
unset
(
$this
->
specialCaseChars
[
$key
]);
}
}
public
function
setSpecialCaseTags
(
array
$tags
=
array
())
{
$this
->
specialCaseTags
=
$tags
;
foreach
(
$this
->
specialCaseTags
as
$tag
)
{
$this
->
addSpecialCaseTag
(
$tag
);
}
}
public
function
addSpecialCaseTag
(
$tag
)
{
if
(!
in_array
(
$tag
,
$this
->
specialCaseTags
))
{
$this
->
specialCaseTags
[]
=
$tag
;
}
$opening
=
$this
->
getOpeningTag
(
$tag
);
$closing
=
$this
->
getClosingTag
(
$tag
);
if
(!
in_array
(
$opening
,
$this
->
specialCaseOpeningTags
))
{
$this
->
specialCaseOpeningTags
[]
=
$opening
;
}
if
(!
in_array
(
$closing
,
$this
->
specialCaseClosingTags
))
{
$this
->
specialCaseClosingTags
[]
=
$closing
;
}
}
public
function
removeSpecialCaseTag
(
$tag
)
{
if
((
$key
=
array_search
(
$tag
,
$this
->
specialCaseTags
))
!==
false
)
{
unset
(
$this
->
specialCaseTags
[
$key
]);
$opening
=
$this
->
getOpeningTag
(
$tag
);
$closing
=
$this
->
getClosingTag
(
$tag
);
if
((
$key
=
array_search
(
$opening
,
$this
->
specialCaseOpeningTags
))
!==
false
)
{
unset
(
$this
->
specialCaseOpeningTags
[
$key
]);
}
if
((
$key
=
array_search
(
$closing
,
$this
->
specialCaseClosingTags
))
!==
false
)
{
unset
(
$this
->
specialCaseClosingTags
[
$key
]);
}
}
}
public
function
getSpecialCaseTags
()
{
return
$this
->
specialCaseTags
;
}
public
function
getOldHtml
()
{
return
$this
->
oldText
;
}
public
function
getNewHtml
()
{
return
$this
->
newText
;
}
public
function
getDifference
()
{
return
$this
->
content
;
}
public
function
setGroupDiffs
(
$boolean
)
{
$this
->
groupDiffs
=
$boolean
;
}
public
function
isGroupDiffs
()
{
return
$this
->
groupDiffs
;
}
protected
function
getOpeningTag
(
$tag
)
{
return
"/<"
.
$tag
.
"[^>]*/i"
;
}
protected
function
getClosingTag
(
$tag
)
{
return
"</"
.
$tag
.
">"
;
}
protected
function
getStringBetween
(
$str
,
$start
,
$end
)
{
$expStr
=
explode
(
$start
,
$str
,
2
);
if
(
count
(
$expStr
)
>
1
)
{
$expStr
=
explode
(
$end
,
$expStr
[
1
]
);
if
(
count
(
$expStr
)
>
1
)
{
array_pop
(
$expStr
);
return
implode
(
$end
,
$expStr
);
}
}
return
''
;
}
protected
function
purifyHtml
(
$html
,
$tags
=
null
)
{
if
(
class_exists
(
'Tidy'
)
&&
false
)
{
$config
=
array
(
'output-xhtml'
=>
true
,
'indent'
=>
false
);
$tidy
=
new
tidy
;
$tidy
->
parseString
(
$html
,
$config
,
'utf8'
);
$html
=
(
string
)
$tidy
;
return
$this
->
getStringBetween
(
$html
,
'<body>'
);
}
return
$html
;
}
public
function
build
()
{
$this
->
splitInputsToWords
();
$this
->
indexNewWords
();
$operations
=
$this
->
operations
();
foreach
(
$operations
as
$item
)
{
$this
->
performOperation
(
$item
);
}
return
$this
->
content
;
}
protected
function
indexNewWords
()
{
$this
->
wordIndices
=
array
();
foreach
(
$this
->
newWords
as
$i
=>
$word
)
{
if
(
$this
->
isTag
(
$word
)
)
{
$word
=
$this
->
stripTagAttributes
(
$word
);
}
if
(
isset
(
$this
->
wordIndices
[
$word
]
)
)
{
$this
->
wordIndices
[
$word
][]
=
$i
;
}
else
{
$this
->
wordIndices
[
$word
]
=
array
(
$i
);
}
}
}
protected
function
splitInputsToWords
()
{
$this
->
oldWords
=
$this
->
convertHtmlToListOfWords
(
$this
->
explode
(
$this
->
oldText
)
);
$this
->
newWords
=
$this
->
convertHtmlToListOfWords
(
$this
->
explode
(
$this
->
newText
)
);
}
protected
function
isPartOfWord
(
$text
)
{
return
ctype_alnum
(
str_replace
(
$this
->
specialCaseChars
,
''
,
$text
));
}
protected
function
convertHtmlToListOfWords
(
$characterString
)
{
$mode
=
'character'
;
$current_word
=
''
;
$words
=
array
();
foreach
(
$characterString
as
$i
=>
$character
)
{
switch
(
$mode
)
{
case
'character'
:
if
(
$this
->
isStartOfTag
(
$character
)
)
{
if
(
$current_word
!=
''
)
{
$words
[]
=
$current_word
;
}
$current_word
=
"<"
;
$mode
=
'tag'
;
}
elseif
(
preg_match
(
"[^
\s
]"
,
$character
)
>
0
)
{
if
(
$current_word
!=
''
)
{
$words
[]
=
$current_word
;
}
$current_word
=
$character
;
$mode
=
'whitespace'
;
}
else
{
if
(
(
ctype_alnum
(
$character
)
&&
(
strlen
(
$current_word
)
==
0
||
$this
->
isPartOfWord
(
$current_word
)))
||
(
in_array
(
$character
,
$this
->
specialCaseChars
)
&&
isset
(
$characterString
[
$i
+
1
])
&&
$this
->
isPartOfWord
(
$characterString
[
$i
+
1
]))
)
{
$current_word
.=
$character
;
}
else
{
$words
[]
=
$current_word
;
$current_word
=
$character
;
}
}
break
;
case
'tag'
:
if
(
$this
->
isEndOfTag
(
$character
)
)
{
$current_word
.=
">"
;
$words
[]
=
$current_word
;
$current_word
=
""
;
if
(
!
preg_match
(
'[^
\s
]'
,
$character
)
)
{
$mode
=
'whitespace'
;
}
else
{
$mode
=
'character'
;
}
}
else
{
$current_word
.=
$character
;
}
break
;
case
'whitespace'
:
if
(
$this
->
isStartOfTag
(
$character
)
)
{
if
(
$current_word
!=
''
)
{
$words
[]
=
$current_word
;
}
$current_word
=
"<"
;
$mode
=
'tag'
;
}
elseif
(
preg_match
(
"[^
\s
]"
,
$character
)
)
{
$current_word
.=
$character
;
}
else
{
if
(
$current_word
!=
''
)
{
$words
[]
=
$current_word
;
}
$current_word
=
$character
;
$mode
=
'character'
;
}
break
;
default
:
break
;
}
}
if
(
$current_word
!=
''
)
{
$words
[]
=
$current_word
;
}
return
$words
;
}
protected
function
isStartOfTag
(
$val
)
{
return
$val
==
"<"
;
}
protected
function
isEndOfTag
(
$val
)
{
return
$val
==
">"
;
}
protected
function
isWhiteSpace
(
$value
)
{
return
!
preg_match
(
'[^
\s
]'
,
$value
);
}
protected
function
explode
(
$value
)
{
// as suggested by @onassar
return
preg_split
(
'//u'
,
$value
);
}
protected
function
performOperation
(
$operation
)
{
switch
(
$operation
->
action
)
{
case
'equal'
:
$this
->
processEqualOperation
(
$operation
);
break
;
case
'delete'
:
$this
->
processDeleteOperation
(
$operation
,
"diffdel"
);
break
;
case
'insert'
:
$this
->
processInsertOperation
(
$operation
,
"diffins"
);
break
;
case
'replace'
:
$this
->
processReplaceOperation
(
$operation
);
break
;
default
:
break
;
}
}
protected
function
processReplaceOperation
(
$operation
)
{
$processDelete
=
strlen
(
$this
->
oldText
)
>
0
;
$processInsert
=
strlen
(
$this
->
newText
)
>
0
;
if
(
$processDelete
)
{
$this
->
processDeleteOperation
(
$operation
,
"diffmod"
);
}
if
(
$this
->
insertSpaceInReplace
&&
$processDelete
&&
$processInsert
)
{
$this
->
content
.=
' '
;
}
if
(
$processInsert
)
{
$this
->
processInsertOperation
(
$operation
,
"diffmod"
);
}
}
protected
function
processInsertOperation
(
$operation
,
$cssClass
)
{
$text
=
array
();
foreach
(
$this
->
newWords
as
$pos
=>
$s
)
{
if
(
$pos
>=
$operation
->
startInNew
&&
$pos
<
$operation
->
endInNew
)
{
$text
[]
=
$s
;
}
}
$this
->
insertTag
(
"ins"
,
$cssClass
,
$text
);
}
protected
function
processDeleteOperation
(
$operation
,
$cssClass
)
{
$text
=
array
();
foreach
(
$this
->
oldWords
as
$pos
=>
$s
)
{
if
(
$pos
>=
$operation
->
startInOld
&&
$pos
<
$operation
->
endInOld
)
{
$text
[]
=
$s
;
}
}
$this
->
insertTag
(
"del"
,
$cssClass
,
$text
);
}
protected
function
processEqualOperation
(
$operation
)
{
$result
=
array
();
foreach
(
$this
->
newWords
as
$pos
=>
$s
)
{
if
(
$pos
>=
$operation
->
startInNew
&&
$pos
<
$operation
->
endInNew
)
{
$result
[]
=
$s
;
}
}
$this
->
content
.=
implode
(
""
,
$result
);
}
protected
function
insertTag
(
$tag
,
$cssClass
,
&
$words
)
{
while
(
true
)
{
if
(
count
(
$words
)
==
0
)
{
break
;
}
$nonTags
=
$this
->
extractConsecutiveWords
(
$words
,
'noTag'
);
$specialCaseTagInjection
=
''
;
$specialCaseTagInjectionIsBefore
=
false
;
if
(
count
(
$nonTags
)
!=
0
)
{
$text
=
$this
->
wrapText
(
implode
(
""
,
$nonTags
),
$tag
,
$cssClass
);
$this
->
content
.=
$text
;
}
else
{
$firstOrDefault
=
false
;
foreach
(
$this
->
specialCaseOpeningTags
as
$x
)
{
if
(
preg_match
(
$x
,
$words
[
0
]
)
)
{
$firstOrDefault
=
$x
;
break
;
}
}
if
(
$firstOrDefault
)
{
$specialCaseTagInjection
=
'<ins class="mod">'
;
if
(
$tag
==
"del"
)
{
unset
(
$words
[
0
]
);
}
}
elseif
(
array_search
(
$words
[
0
],
$this
->
specialCaseClosingTags
)
!==
false
)
{
$specialCaseTagInjection
=
"</ins>"
;
$specialCaseTagInjectionIsBefore
=
true
;
if
(
$tag
==
"del"
)
{
unset
(
$words
[
0
]
);
}
}
}
if
(
count
(
$words
)
==
0
&&
count
(
$specialCaseTagInjection
)
==
0
)
{
break
;
}
if
(
$specialCaseTagInjectionIsBefore
)
{
$this
->
content
.=
$specialCaseTagInjection
.
implode
(
""
,
$this
->
extractConsecutiveWords
(
$words
,
'tag'
)
);
}
else
{
$workTag
=
$this
->
extractConsecutiveWords
(
$words
,
'tag'
);
if
(
isset
(
$workTag
[
0
]
)
&&
$this
->
isOpeningTag
(
$workTag
[
0
]
)
&&
!
$this
->
isClosingTag
(
$workTag
[
0
]
)
)
{
if
(
strpos
(
$workTag
[
0
],
'class='
)
)
{
$workTag
[
0
]
=
str_replace
(
'class="'
,
'class="diffmod '
,
$workTag
[
0
]
);
$workTag
[
0
]
=
str_replace
(
"class='"
,
'class="diffmod '
,
$workTag
[
0
]
);
}
else
{
$workTag
[
0
]
=
str_replace
(
">"
,
' class="diffmod">'
,
$workTag
[
0
]
);
}
}
$this
->
content
.=
implode
(
""
,
$workTag
)
.
$specialCaseTagInjection
;
}
}
}
protected
function
checkCondition
(
$word
,
$condition
)
{
return
$condition
==
'tag'
?
$this
->
isTag
(
$word
)
:
!
$this
->
isTag
(
$word
);
}
protected
function
wrapText
(
$text
,
$tagName
,
$cssClass
)
{
return
sprintf
(
'<%1$s class="%2$s">%3$s</%1$s>'
,
$tagName
,
$cssClass
,
$text
);
}
protected
function
extractConsecutiveWords
(&
$words
,
$condition
)
{
$indexOfFirstTag
=
null
;
foreach
(
$words
as
$i
=>
$word
)
{
if
(
!
$this
->
checkCondition
(
$word
,
$condition
)
)
{
$indexOfFirstTag
=
$i
;
break
;
}
}
if
(
$indexOfFirstTag
!==
null
)
{
$items
=
array
();
foreach
(
$words
as
$pos
=>
$s
)
{
if
(
$pos
>=
0
&&
$pos
<
$indexOfFirstTag
)
{
$items
[]
=
$s
;
}
}
if
(
$indexOfFirstTag
>
0
)
{
array_splice
(
$words
,
0
,
$indexOfFirstTag
);
}
return
$items
;
}
else
{
$items
=
array
();
foreach
(
$words
as
$pos
=>
$s
)
{
if
(
$pos
>=
0
&&
$pos
<=
count
(
$words
)
)
{
$items
[]
=
$s
;
}
}
array_splice
(
$words
,
0
,
count
(
$words
)
);
return
$items
;
}
}
protected
function
isTag
(
$item
)
{
return
$this
->
isOpeningTag
(
$item
)
||
$this
->
isClosingTag
(
$item
);
}
protected
function
isOpeningTag
(
$item
)
{
return
preg_match
(
"#<[^>]+>
\\
s*#iU"
,
$item
);
}
protected
function
isClosingTag
(
$item
)
{
return
preg_match
(
"#</[^>]+>
\\
s*#iU"
,
$item
);
}
protected
function
operations
()
{
$positionInOld
=
0
;
$positionInNew
=
0
;
$operations
=
array
();
$matches
=
$this
->
matchingBlocks
();
$matches
[]
=
new
Match
(
count
(
$this
->
oldWords
),
count
(
$this
->
newWords
),
0
);
foreach
(
$matches
as
$i
=>
$match
)
{
$matchStartsAtCurrentPositionInOld
=
(
$positionInOld
==
$match
->
startInOld
);
$matchStartsAtCurrentPositionInNew
=
(
$positionInNew
==
$match
->
startInNew
);
$action
=
'none'
;
if
(
$matchStartsAtCurrentPositionInOld
==
false
&&
$matchStartsAtCurrentPositionInNew
==
false
)
{
$action
=
'replace'
;
}
elseif
(
$matchStartsAtCurrentPositionInOld
==
true
&&
$matchStartsAtCurrentPositionInNew
==
false
)
{
$action
=
'insert'
;
}
elseif
(
$matchStartsAtCurrentPositionInOld
==
false
&&
$matchStartsAtCurrentPositionInNew
==
true
)
{
$action
=
'delete'
;
}
else
{
// This occurs if the first few words are the same in both versions
$action
=
'none'
;
}
if
(
$action
!=
'none'
)
{
$operations
[]
=
new
Operation
(
$action
,
$positionInOld
,
$match
->
startInOld
,
$positionInNew
,
$match
->
startInNew
);
}
if
(
count
(
$match
)
!=
0
)
{
$operations
[]
=
new
Operation
(
'equal'
,
$match
->
startInOld
,
$match
->
endInOld
(),
$match
->
startInNew
,
$match
->
endInNew
()
);
}
$positionInOld
=
$match
->
endInOld
();
$positionInNew
=
$match
->
endInNew
();
}
return
$operations
;
}
protected
function
matchingBlocks
()
{
$matchingBlocks
=
array
();
$this
->
findMatchingBlocks
(
0
,
count
(
$this
->
oldWords
),
0
,
count
(
$this
->
newWords
),
$matchingBlocks
);
return
$matchingBlocks
;
}
protected
function
findMatchingBlocks
(
$startInOld
,
$endInOld
,
$startInNew
,
$endInNew
,
&
$matchingBlocks
)
{
$match
=
$this
->
findMatch
(
$startInOld
,
$endInOld
,
$startInNew
,
$endInNew
);
if
(
$match
!==
null
)
{
if
(
$startInOld
<
$match
->
startInOld
&&
$startInNew
<
$match
->
startInNew
)
{
$this
->
findMatchingBlocks
(
$startInOld
,
$match
->
startInOld
,
$startInNew
,
$match
->
startInNew
,
$matchingBlocks
);
}
$matchingBlocks
[]
=
$match
;
if
(
$match
->
endInOld
()
<
$endInOld
&&
$match
->
endInNew
()
<
$endInNew
)
{
$this
->
findMatchingBlocks
(
$match
->
endInOld
(),
$endInOld
,
$match
->
endInNew
(),
$endInNew
,
$matchingBlocks
);
}
}
}
protected
function
stripTagAttributes
(
$word
)
{
$word
=
explode
(
' '
,
trim
(
$word
,
'<>'
)
);
return
'<'
.
$word
[
0
]
.
'>'
;
}
protected
function
findMatch
(
$startInOld
,
$endInOld
,
$startInNew
,
$endInNew
)
{
$bestMatchInOld
=
$startInOld
;
$bestMatchInNew
=
$startInNew
;
$bestMatchSize
=
0
;
$matchLengthAt
=
array
();
for
(
$indexInOld
=
$startInOld
;
$indexInOld
<
$endInOld
;
$indexInOld
++)
{
$newMatchLengthAt
=
array
();
$index
=
$this
->
oldWords
[
$indexInOld
];
if
(
$this
->
isTag
(
$index
)
)
{
$index
=
$this
->
stripTagAttributes
(
$index
);
}
if
(
!
isset
(
$this
->
wordIndices
[
$index
]
)
)
{
$matchLengthAt
=
$newMatchLengthAt
;
continue
;
}
foreach
(
$this
->
wordIndices
[
$index
]
as
$indexInNew
)
{
if
(
$indexInNew
<
$startInNew
)
{
continue
;
}
if
(
$indexInNew
>=
$endInNew
)
{
break
;
}
$newMatchLength
=
(
isset
(
$matchLengthAt
[
$indexInNew
-
1
]
)
?
$matchLengthAt
[
$indexInNew
-
1
]
:
0
)
+
1
;
$newMatchLengthAt
[
$indexInNew
]
=
$newMatchLength
;
if
(
$newMatchLength
>
$bestMatchSize
)
{
$bestMatchInOld
=
$indexInOld
-
$newMatchLength
+
1
;
$bestMatchInNew
=
$indexInNew
-
$newMatchLength
+
1
;
$bestMatchSize
=
$newMatchLength
;
}
}
$matchLengthAt
=
$newMatchLengthAt
;
}
// Skip match if none found or match consists only of whitespace
if
(
$bestMatchSize
!=
0
&&
(
!
$this
->
isGroupDiffs
()
||
!
preg_match
(
'/^
\s
+$/'
,
implode
(
''
,
array_slice
(
$this
->
oldWords
,
$bestMatchInOld
,
$bestMatchSize
)))
)
)
{
return
new
Match
(
$bestMatchInOld
,
$bestMatchInNew
,
$bestMatchSize
);
}
return
null
;
}
}
File Metadata
Details
Attached
Mime Type
text/x-php
Expires
Sat, Apr 4, 5:34 AM (4 d, 23 h ago)
Storage Engine
local-disk
Storage Format
Raw Data
Storage Handle
39/2f/51c8ecfa422d8290d67e38bea655
Default Alt Text
HtmlDiff.php (20 KB)
Attached To
Mode
rRPK roundcubemail-plugins-kolab
Attached
Detach File
Event Timeline