Process any amounts of data. Any time
Juozas Kaziukėnas // juokaz.com // @juokaz
Juozas Kaziukėnas, Lithuanian
You can call me Joe
More info http://juokaz.com
Last year at PHPBNL12
Planking competition
The problem
CRON
PHP developers are lazy
Code should be lazy
file_get_contens
Buffering
Wait
Memory usage
Saw graph
Memory vs performance
Reading files
Line-by-line reading
$file_handle = fopen("myfile", "r");
while (!feof($file_handle)) {
$line = fgets($file_handle);
echo $line;
}
fclose($file_handle);
Line-by-line processing
$file_handle = fopen("myfile", "r");
while (!feof($file_handle)) {
$line = fgets($file_handle);
process($line);
}
fclose($file_handle);
XML
Read XML files
$z = new XMLReader;$z->open('data.xml');
$doc = new DOMDocument;
// move to the first <product /> nodewhile ($z->read() && $z->name !== 'product');
// now that we're at the right depth, hop to the next <product/> until the end of the treewhile ($z->name === 'product'){ // either one should work //$node = new SimpleXMLElement($z->readOuterXML()); $node = simplexml_import_dom($doc->importNode($z->expand(), true));
// now you can use $node without going insane about parsing var_dump($node->element_1);
// go to next <product /> $z->next('product');}
$z = new XMLReader;$z->open('data.xml');
$doc = new DOMDocument;
// move to the first <product /> nodewhile ($z->read() && $z->name !== 'product');
// now that we're at the right depth, hop to the next <product/> until the end of the treewhile ($z->name === 'product'){ // either one should work //$node = new SimpleXMLElement($z->readOuterXML()); $node = simplexml_import_dom($doc->importNode($z->expand(), true));
// now you can use $node without going insane about parsing var_dump($node->element_1);
// go to next <product /> $z->next('product');}
Running processes
Pipes
Linux
wget http://example.com/lol.xml.gz
gunzip lol.xml.gz
Linux
wget -O- http://example.com/lol.xml.gz | gunzip
$command = "wget -O- $url | gunzip";
$process = proc_open($command, array( array("pipe","r"), array("pipe","w"), array("pipe","w") ),$pipes);
while(!feof($pipes[1])) { $buffer .= fgets($pipes[1], 128); if (strpos($buffer, '<Item>')) {
// detected start of the item } if (strpos($buffer, '</Item>')) {
// detected end of the item }}fclose($pipes[1]);proc_close($process);
Reading from MySQL
mysql_unbuffered_query
$lh = mysql_connect( 'server', 'uname', 'pword' );
$qry = "SELECT * FROM my_bigass_table";
$rh = mysql_unbuffered_query( $qry, $lh );
while ( $res = mysql_fetch_row( $rh ) )
{
process($res);
}
Outputting data
PHP
$result = '';
foreach ($data as $item) {
$line = 'Name ' . $item['name'] . PHP_EOL;
$result .= $line;
}
echo $result;
PHP
$result = '';
ob_end_clean();
foreach ($data as $item) {
echo 'Name ' . $item['name'] . PHP_EOL;
flush();
}
PHP 5.5
Generators
Yield
Line-by-line processing
$file_handle = fopen("myfile", "r");
while (!feof($file_handle)) {
$line = fgets($file_handle);
process($line);
}
fclose($file_handle);
Line-by-line processing
function read() { $file_handle = fopen("myfile", "r"); while (!feof($file_handle)) { $line = fgets($file_handle); yield $line; } fclose($file_handle);}
$data = read();foreach ($data as $line) {}
Making HTTP requests
Open connections limit
Connection Sharing with CURL in PHP: How to re-use HTTP connections to knock 70% off REST network time.http://technosophos.com/content/connection-sharing-curl-php-how-re-use-http-connections-knock-70-
Next steps
Gearman
Hadoop
Why PHP?
What’s possible
Using it for good
Thanks