Complex datatypes
PHP - process serialised data
PHP data structures can be transformed into a storable representation using the serialize()
function.
A PHP array can be serialised in order to store it in the database, for example.
$array = [
'key1' => 'value1',
'key2' => 'value2',
];
echo serialize($array);
// output: a:2:{s:4:"key1";s:6:"value1";s:4:"key2";s:6:"value2";}
This data structure can later be converted back into a PHP data structure with the function unserialize()
.
A description of the data format can be found in the PHP source code and partly in the PHP documentation.
While working with simple data structures like strings or arrays would be quite easy for pseudify, working with serialised PHP objects becomes more difficult.
use Waldhacker\Pseudify\Core\Tests\Unit\Processor\Encoder\Serialized\Fixtures\SimpleObject;
echo serialize(new SimpleObject('baz1', 'baz2', 'baz3'));
// output: O:86:"Waldhacker\Pseudify\Core\Tests\Unit\Processor\Encoder\Serialized\Fixtures\SimpleObject":3:{s:101:"\x00Waldhacker\Pseudify\Core\Tests\Unit\Processor\Encoder\Serialized\Fixtures\SimpleObject\x00privateMember";s:4:"baz1";s:18:"*protectedMember";s:4:"baz2";s:12:"publicMember";s:4:"baz3";}`
A serialised PHP object O:86:"Waldhacker\Pseudify\Core\Tests\Unit\Processor\Encoder\Serialized\Fixtures\SimpleObject":3:{s:101:"\x00Waldhacker\Pseudify\Core\Tests\Unit\Processor\Encoder\Serialized\Fixtures\SimpleObject\x00privateMember";s:4:"baz1";s:18:"*protectedMember";s:4:"baz2";s:12:"publicMember";s:4:"baz3";}
can only be deserialised by PHP if the PHP autoloader has access to the corresponding source code files in which the object is defined.
However, Pseudify must be executable without the source code of any applications.
How can we now specifically pseudonymise e.g. the value of the property publicMember
(baz3
) without having to apply error-prone search-and-replace strategies to the text (e.g. with wild regular expressions)?
For this use case, pseudify offers you the SerializedEncoder
!
With the SerializedEncoder
it is possible to convert a serialised data structure into an AST, manipulate it and then write the AST back into a serialised data structure.
Let's look at a few examples
Integer
<?php
use Waldhacker\Pseudify\Core\Processor\Encoder\SerializedEncoder;
$data = 1;
$serializedData = serialize($data);
$encoder = new SerializedEncoder();
$serializedDataAST = $encoder->decode(data: $serializedData);
echo 'serialized data: ' . $serializedData . PHP_EOL . PHP_EOL;
echo 'serialized data AST:' . PHP_EOL . PHP_EOL;
dump($serializedDataAST);
serialized data: i:1;
serialized data AST:
IntegerNode {
-content: 1
}
Float
<?php
use Waldhacker\Pseudify\Core\Processor\Encoder\SerializedEncoder;
$data = 1.1;
$serializedData = serialize($data);
$encoder = new SerializedEncoder();
$serializedDataAST = $encoder->decode(data: $serializedData);
echo 'serialized data: ' . $serializedData . PHP_EOL . PHP_EOL;
echo 'serialized data AST:' . PHP_EOL . PHP_EOL;
dump($serializedDataAST);
serialized data: d:1.1000000000000001;
serialized data AST:
FloatNode {
-content: 1.1
}
Boolean
<?php
use Waldhacker\Pseudify\Core\Processor\Encoder\SerializedEncoder;
$data = true;
$serializedData = serialize($data);
$encoder = new SerializedEncoder();
$serializedDataAST = $encoder->decode(data: $serializedData);
echo 'serialized data: ' . $serializedData . PHP_EOL . PHP_EOL;
echo 'serialized data AST:' . PHP_EOL . PHP_EOL;
dump($serializedDataAST);
serialized data: b:1;
serialized data AST:
BooleanNode {
-content: true
}
NULL
<?php
use Waldhacker\Pseudify\Core\Processor\Encoder\SerializedEncoder;
$data = null;
$serializedData = serialize($data);
$encoder = new SerializedEncoder();
$serializedDataAST = $encoder->decode(data: $serializedData);
echo 'serialized data: ' . $serializedData . PHP_EOL . PHP_EOL;
echo 'serialized data AST:' . PHP_EOL . PHP_EOL;
dump($serializedDataAST);
serialized data: N;
serialized data AST:
NullNode {
}
String
<?php
use Waldhacker\Pseudify\Core\Processor\Encoder\SerializedEncoder;
$data = 'How nice is this!';
$serializedData = serialize($data);
$encoder = new SerializedEncoder();
$serializedDataAST = $encoder->decode(data: $serializedData);
echo 'serialized data: ' . $serializedData . PHP_EOL . PHP_EOL;
echo 'serialized data AST:' . PHP_EOL . PHP_EOL;
dump($serializedDataAST);
serialized data: s:17:"How nice is this!";
serialized data AST:
StringNode {
-content: "How nice is this!"
}
Arrays
<?php
use Waldhacker\Pseudify\Core\Processor\Encoder\SerializedEncoder;
$data = ['how', 'nice'];
$serializedData = serialize($data);
$encoder = new SerializedEncoder();
$serializedDataAST = $encoder->decode(data: $serializedData);
echo 'serialized data: ' . $serializedData . PHP_EOL . PHP_EOL;
echo 'serialized data AST:' . PHP_EOL . PHP_EOL;
dump($serializedDataAST);
serialized data: a:2:{i:0;s:3:"how";i:1;s:4:"nice";}
serialized data AST:
ArrayNode {
-properties: array:2 [
0 => ArrayElementNode {
-content: StringNode {
-content: "how"
}
-key: IntegerNode {
-content: 0
}
}
1 => ArrayElementNode {
-content: StringNode {
#parentNode: ArrayElementNode {}
-content: "nice"
}
-key: IntegerNode {
-content: 1
}
}
]
}
<?php
use Waldhacker\Pseudify\Core\Processor\Encoder\SerializedEncoder;
$data = ['key1' => 'how', 'key2' => 'nice'];
$serializedData = serialize($data);
$encoder = new SerializedEncoder();
$serializedDataAST = $encoder->decode(data: $serializedData);
echo 'serialized data: ' . $serializedData . PHP_EOL . PHP_EOL;
echo 'serialized data AST:' . PHP_EOL . PHP_EOL;
dump($serializedDataAST);
serialized data: a:2:{s:4:"key1";s:3:"how";s:4:"key2";s:4:"nice";}
serialized data AST:
ArrayNode {
-properties: array:2 [
"key1" => ArrayElementNode {
-content: StringNode {
-content: "how"
}
-key: StringNode {
-content: "key1"
}
}
"key2" => ArrayElementNode {
-content: StringNode {
-content: "nice"
}
-key: StringNode {
-content: "key2"
}
}
]
}
<?php
use Waldhacker\Pseudify\Core\Processor\Encoder\SerializedEncoder;
$data = ['key1' => 'how', 'nice', null, 99 => 123];
$serializedData = serialize($data);
$encoder = new SerializedEncoder();
$serializedDataAST = $encoder->decode(data: $serializedData);
echo 'serialized data: ' . $serializedData . PHP_EOL . PHP_EOL;
echo 'serialized data AST:' . PHP_EOL . PHP_EOL;
dump($serializedDataAST);
serialized data: a:4:{s:4:"key1";s:3:"how";i:0;s:4:"nice";i:1;N;i:99;i:123;}
serialized data AST:
ArrayNode {
-properties: array:4 [
"key1" => ArrayElementNode {
-content: StringNode {
-content: "how"
}
-key: StringNode {
-content: "key1"
}
}
0 => ArrayElementNode {
-content: StringNode {
-content: "nice"
}
-key: IntegerNode {
-content: 0
}
}
1 => ArrayElementNode {
-content: NullNode {
}
-key: IntegerNode {
-content: 1
}
}
99 => ArrayElementNode {
-content: IntegerNode {
-content: 123
}
-key: IntegerNode {
-content: 99
}
}
]
}
<?php
use Waldhacker\Pseudify\Core\Processor\Encoder\SerializedEncoder;
$data = ['key1' => 'how', 'nice', 'key2' => ['is', 'this']];
$serializedData = serialize($data);
$encoder = new SerializedEncoder();
$serializedDataAST = $encoder->decode(data: $serializedData);
echo 'serialized data: ' . $serializedData . PHP_EOL . PHP_EOL;
echo 'serialized data AST:' . PHP_EOL . PHP_EOL;
dump($serializedDataAST);
ArrayNode {
-properties: array:3 [
"key1" => ArrayElementNode {
-content: StringNode {
-content: "how"
}
-key: StringNode {
-content: "key1"
}
}
0 => ArrayElementNode {
-content: StringNode {
-content: "nice"
}
-key: IntegerNode {
-content: 0
}
}
"key2" => ArrayElementNode {
-content: ArrayNode {
-properties: array:2 [
0 => ArrayElementNode {
-content: StringNode {
-content: "is"
}
-key: IntegerNode {
-content: 0
}
}
1 => ArrayElementNode {
-content: StringNode {
-content: "this"
}
-key: IntegerNode {
-content: 1
}
}
]
}
-key: StringNode {
-content: "key2"
}
}
]
}
Objects
<?php
use Waldhacker\Pseudify\Core\Processor\Encoder\SerializedEncoder;
$data = new \Waldhacker\Pseudify\Core\Tests\Unit\Processor\Encoder\Serialized\Fixtures\SimpleObject('baz1', 'baz2', 'baz3');
$serializedData = serialize($data);
$encoder = new SerializedEncoder();
$serializedDataAST = $encoder->decode(data: $serializedData);
echo 'serialized data: ' . $serializedData . PHP_EOL . PHP_EOL;
echo 'serialized data AST:' . PHP_EOL . PHP_EOL;
dump($serializedDataAST);
serialized data: O:86:"Waldhacker\Pseudify\Core\Tests\Unit\Processor\Encoder\Serialized\Fixtures\SimpleObject":3:{s:101:"Waldhacker\Pseudify\Core\Tests\Unit\Processor\Encoder\Serialized\Fixtures\SimpleObjectprivateMember";s:4:"baz1";s:18:"*protectedMember";s:4:"baz2";s:12:"publicMember";s:4:"baz3";}
serialized data AST:
ObjectNode {
-properties: array:3 [
"privateMember" => AttributeNode {
-content: StringNode {
-content: "baz1"
}
-propertyName: "privateMember"
-scope: "private"
-className: "Waldhacker\Pseudify\Core\Tests\Unit\Processor\Encoder\Serialized\Fixtures\SimpleObject"
}
"protectedMember" => AttributeNode {
-content: StringNode {
-content: "baz2"
}
-propertyName: "protectedMember"
-scope: "protected"
-className: "*"
}
"publicMember" => AttributeNode {
-content: StringNode {
-content: "baz3"
}
-propertyName: "publicMember"
-scope: "public"
-className: null
}
]
-className: "Waldhacker\Pseudify\Core\Tests\Unit\Processor\Encoder\Serialized\Fixtures\SimpleObject"
}
<?php
use Waldhacker\Pseudify\Core\Processor\Encoder\SerializedEncoder;
$data = new \Waldhacker\Pseudify\Core\Tests\Unit\Processor\Encoder\Serialized\Fixtures\SimpleObject(null, null, ['key1' => 'value1']);
$serializedData = serialize($data);
$encoder = new SerializedEncoder();
$serializedDataAST = $encoder->decode(data: $serializedData);
echo 'serialized data: ' . $serializedData . PHP_EOL . PHP_EOL;
echo 'serialized data AST:' . PHP_EOL . PHP_EOL;
dump($serializedDataAST);
serialized data: O:86:"Waldhacker\Pseudify\Core\Tests\Unit\Processor\Encoder\Serialized\Fixtures\SimpleObject":3:{s:101:"Waldhacker\Pseudify\Core\Tests\Unit\Processor\Encoder\Serialized\Fixtures\SimpleObjectprivateMember";N;s:18:"*protectedMember";N;s:12:"publicMember";a:1:{s:4:"key1";s:6:"value1";}}
serialized data AST:
ObjectNode {
-properties: array:3 [
"privateMember" => AttributeNode {
-content: NullNode {
}
-propertyName: "privateMember"
-scope: "private"
-className: "Waldhacker\Pseudify\Core\Tests\Unit\Processor\Encoder\Serialized\Fixtures\SimpleObject"
}
"protectedMember" => AttributeNode {
-content: NullNode {
}
-propertyName: "protectedMember"
-scope: "protected"
-className: "*"
}
"publicMember" => AttributeNode {
-content: ArrayNode {
-properties: array:1 [
"key1" => ArrayElementNode {
-content: StringNode {
-content: "value1"
}
-key: StringNode {
-content: "key1"
}
}
]
}
-propertyName: "publicMember"
-scope: "public"
-className: null
}
]
-className: "Waldhacker\Pseudify\Core\Tests\Unit\Processor\Encoder\Serialized\Fixtures\SimpleObject"
}
Create / manipulate data
Info
You can view the available methods of the individual node implementations in the repository.
Scalar values
Creating scalar values is easy. Only a new instance of the corresponding data type has to be created.
Integer
<?php
use Waldhacker\Pseudify\Core\Processor\Encoder\SerializedEncoder;
$encoder = new SerializedEncoder();
$data = 1;
$node = $encoder->decode(data: serialize($data));
echo 'get the node value: ' . var_export($node->getValue(), true) . PHP_EOL;
get the node value: 1
Float
<?php
use Waldhacker\Pseudify\Core\Processor\Encoder\SerializedEncoder;
$encoder = new SerializedEncoder();
$data = 123.321;
$node = $encoder->decode(data: serialize($data));
echo 'get the node value: ' . var_export($node->getValue(), true) . PHP_EOL;
get the node value: 123.321
Boolean
<?php
use Waldhacker\Pseudify\Core\Processor\Encoder\SerializedEncoder;
$encoder = new SerializedEncoder();
$data = true;
$node = $encoder->decode(data: serialize($data));
echo 'get the node value: ' . var_export($node->getValue(), true) . PHP_EOL;
get the node value: true
Null
<?php
use Waldhacker\Pseudify\Core\Processor\Encoder\SerializedEncoder;
$encoder = new SerializedEncoder();
$data = null;
$node = $encoder->decode(data: serialize($data));
echo 'get the node value: ' . var_export($node->getValue(), true) . PHP_EOL;
get the node value: NULL
String
<?php
use Waldhacker\Pseudify\Core\Processor\Encoder\SerializedEncoder;
$encoder = new SerializedEncoder();
$data = 'how nice';
$node = $encoder->decode(data: serialize($data));
echo 'get the node value: ' . var_export($node->getValue(), true) . PHP_EOL;
get the node value: 'how nice'
Arrays
The array looks like this:
[
0 => 'value1',
'key2' => 'value2',
'key3' => [
0 => 'value3',
'key4' => 'value4'
]
];
Get the node value for array key 0
<?php
use Waldhacker\Pseudify\Core\Processor\Encoder\SerializedEncoder;
$encoder = new SerializedEncoder();
$data = [0 => 'value1', 'key2' => 'value2', 'key3' => [0 => 'value3', 'key4' => 'value4']];
$node = $encoder->decode(data: serialize($data));
echo PHP_EOL;
// get the node value for array key 0
$value = $node->getPropertyContent(identifier: 0)->getValue();
echo var_export($value, true) . PHP_EOL;
'value1'
Get the node value for array key 'key2'
<?php
use Waldhacker\Pseudify\Core\Processor\Encoder\SerializedEncoder;
$encoder = new SerializedEncoder();
$data = [0 => 'value1', 'key2' => 'value2', 'key3' => [0 => 'value3', 'key4' => 'value4']];
$node = $encoder->decode(data: serialize($data));
echo PHP_EOL;
// get the node value for array key 'key2'
$value = $node->getPropertyContent(identifier: 'key2')->getValue();
echo var_export($value, true) . PHP_EOL;
'value2'
Get all array keys of the first array level
<?php
use Waldhacker\Pseudify\Core\Processor\Encoder\SerializedEncoder;
use Waldhacker\Pseudify\Core\Processor\Encoder\Serialized\Node\ArrayElementNode;
$encoder = new SerializedEncoder();
$data = [0 => 'value1', 'key2' => 'value2', 'key3' => [0 => 'value3', 'key4' => 'value4']];
$node = $encoder->decode(data: serialize($data));
echo PHP_EOL;
// get all array keys of the first array level
$value = array_map(fn(ArrayElementNode $elementNode): string|int => $elementNode->getPropertyName(), $node->getContent());
echo var_export($value, true) . PHP_EOL;
array (
0 => 0,
'key2' => 'key2',
'key3' => 'key3',
)
Replace 'value1' (key 0) with 'new value'
<?php
use Waldhacker\Pseudify\Core\Processor\Encoder\SerializedEncoder;
$encoder = new SerializedEncoder();
$data = [0 => 'value1', 'key2' => 'value2', 'key3' => [0 => 'value3', 'key4' => 'value4']];
$node = $encoder->decode(data: serialize($data));
$originalSerializedData = $encoder->encode(data: $node);
echo PHP_EOL;
// replace 'value1' (key 0) with 'new value'
$newValue = 'new value';
$newValueNode = $encoder->decode(data: serialize($newValue));
$node->replaceProperty(identifier: 0, property: $newValueNode);
$newSerializedData = $encoder->encode(data: $node);
echo 'original data: ' . $originalSerializedData . PHP_EOL;
echo 'new data: ' . $newSerializedData . PHP_EOL;
original data: a:3:{i:0;s:6:"value1";s:4:"key2";s:6:"value2";s:4:"key3";a:2:{i:0;s:6:"value3";s:4:"key4";s:6:"value4";}}
new data: a:3:{i:0;s:9:"new value";s:4:"key2";s:6:"value2";s:4:"key3";a:2:{i:0;s:6:"value3";s:4:"key4";s:6:"value4";}}
Replace 'value4' (key 'key3' => 'key4') with 'newer value'
<?php
use Waldhacker\Pseudify\Core\Processor\Encoder\SerializedEncoder;
$encoder = new SerializedEncoder();
$data = [0 => 'value1', 'key2' => 'value2', 'key3' => [0 => 'value3', 'key4' => 'value4']];
$node = $encoder->decode(data: serialize($data));
$originalSerializedData = $encoder->encode(data: $node);
echo PHP_EOL;
// replace 'value4' (key 'key3' => 'key4') with 'newer value'
$newValue = 'newer value';
$newValueNode = $encoder->decode(data: serialize($newValue));
$node->getPropertyContent(identifier: 'key3')->replaceProperty(identifier: 'key4', property: $newValueNode);
$newSerializedData = $encoder->encode(data: $node);
echo 'original data: ' . $originalSerializedData . PHP_EOL;
echo 'new data: ' . $newSerializedData . PHP_EOL;
original data: a:3:{i:0;s:6:"value1";s:4:"key2";s:6:"value2";s:4:"key3";a:2:{i:0;s:6:"value3";s:4:"key4";s:6:"value4";}}
new data: a:3:{i:0;s:6:"value1";s:4:"key2";s:6:"value2";s:4:"key3";a:2:{i:0;s:6:"value3";s:4:"key4";s:11:"newer value";}}
Objects
The object looks like this:
class SimpleObject
{
private $privateMember;
protected $protectedMember;
public $publicMember;
public function __construct($privateMember, $protectedMember, $publicMember)
{
$this->privateMember = $privateMember;
$this->protectedMember = $protectedMember;
$this->publicMember = $publicMember;
}
}
Get the node value for class member 'privateMember' ('value1')
<?php
use Waldhacker\Pseudify\Core\Processor\Encoder\SerializedEncoder;
use Waldhacker\Pseudify\Core\Tests\Unit\Processor\Encoder\Serialized\Fixtures\SimpleObject;
$encoder = new SerializedEncoder();
$data = new SimpleObject('value1', 'value2', 'value3');
$node = $encoder->decode(data: serialize($data));
echo PHP_EOL;
// get the node value for class member 'privateMember' ('value1')
$value = $node->getPropertyContent(identifier: 'privateMember')->getValue();
echo var_export($value, true) . PHP_EOL;
'value1'
Get the node value for class member 'protectedMember' ('value2')
<?php
use Waldhacker\Pseudify\Core\Processor\Encoder\SerializedEncoder;
use Waldhacker\Pseudify\Core\Tests\Unit\Processor\Encoder\Serialized\Fixtures\SimpleObject;
$encoder = new SerializedEncoder();
$data = new SimpleObject('value1', 'value2', 'value3');
$node = $encoder->decode(data: serialize($data));
echo PHP_EOL;
// get the node value for class member 'protectedMember' ('value2)
$value = $node->getPropertyContent(identifier: 'protectedMember')->getValue();
echo var_export($value, true) . PHP_EOL;
'value2'
Get the node value for class member 'publicMember' ('value3')
<?php
use Waldhacker\Pseudify\Core\Processor\Encoder\SerializedEncoder;
use Waldhacker\Pseudify\Core\Tests\Unit\Processor\Encoder\Serialized\Fixtures\SimpleObject;
$encoder = new SerializedEncoder();
$data = new SimpleObject('value1', 'value2', 'value3');
$node = $encoder->decode(data: serialize($data));
echo PHP_EOL;
// get the node value for class member 'publicMember' ('value3)
$value = $node->getPropertyContent(identifier: 'publicMember')->getValue();
echo var_export($value, true) . PHP_EOL;
'value3'
Get all (direct) class member names
<?php
use Waldhacker\Pseudify\Core\Processor\Encoder\SerializedEncoder;
use Waldhacker\Pseudify\Core\Processor\Encoder\Serialized\Node\AttributeNode;
use Waldhacker\Pseudify\Core\Tests\Unit\Processor\Encoder\Serialized\Fixtures\SimpleObject;
$encoder = new SerializedEncoder();
$data = new SimpleObject('value1', 'value2', 'value3');
$node = $encoder->decode(data: serialize($data));
echo PHP_EOL;
// get all (direct) class member names
$value = array_map(fn(AttributeNode $attributeNode): string => $attributeNode->getPropertyName(), $node->getContent());
echo var_export($value, true) . PHP_EOL;
array (
'privateMember' => 'privateMember',
'protectedMember' => 'protectedMember',
'publicMember' => 'publicMember',
)
Replace 'value3' ('publicMember') with 'newer value'
<?php
use Waldhacker\Pseudify\Core\Processor\Encoder\SerializedEncoder;
use Waldhacker\Pseudify\Core\Tests\Unit\Processor\Encoder\Serialized\Fixtures\SimpleObject;
$encoder = new SerializedEncoder();
$data = new SimpleObject('value1', 'value2', 'value3');
$node = $encoder->decode(data: serialize($data));
$originalSerializedData = $encoder->encode(data: $node);
echo PHP_EOL;
// replace 'value3' ('publicMember') with 'newer value'
$newValue = 'newer value';
$newValueNode = $encoder->decode(data: serialize($newValue));
$node->replaceProperty(identifier: 'publicMember', property: $newValueNode);
$newSerializedData = $encoder->encode(data: $node);
echo 'original data: ' . $originalSerializedData . PHP_EOL;
echo 'new data: ' . $newSerializedData . PHP_EOL;
original data: O:86:"Waldhacker\Pseudify\Core\Tests\Unit\Processor\Encoder\Serialized\Fixtures\SimpleObject":3:{s:101:"Waldhacker\Pseudify\Core\Tests\Unit\Processor\Encoder\Serialized\Fixtures\SimpleObjectprivateMember";s:6:"value1";s:18:"*protectedMember";s:6:"value2";s:12:"publicMember";s:6:"value3";}
new data: O:86:"Waldhacker\Pseudify\Core\Tests\Unit\Processor\Encoder\Serialized\Fixtures\SimpleObject":3:{s:101:"Waldhacker\Pseudify\Core\Tests\Unit\Processor\Encoder\Serialized\Fixtures\SimpleObjectprivateMember";s:6:"value1";s:18:"*protectedMember";s:6:"value2";s:12:"publicMember";s:11:"newer value";}