Add-On Extraction of PDF properties
Informations
Author: Sacha MPSLicense: FPDF
Description
This function extracts the document properties from a PDF file. It works fine with documents generated by FPDF. It can also work with PDFs made from other generators, but not all.The
get_pdf_prop()
function returns an associative array whose keys are the names of
the properties found in the file (such as Author, CreationDate, Creator, Keywords, Producer,
Subject and Title).
Source
<?php
function get_pdf_prop($file)
{
$f = fopen($file, 'rb');
if(!$f)
return false;
//Read the last 16KB
fseek($f, -16384, SEEK_END);
$s = fread($f, 16384);
//Extract cross-reference table and trailer
if(!preg_match("/xref[\r\n]+(.*)trailer(.*)startxref/s", $s, $a))
return false;
$xref = $a[1];
$trailer = $a[2];
//Extract Info object number
if(!preg_match('/Info ([0-9]+) /', $trailer, $a))
return false;
$object_no = $a[1];
//Extract Info object offset
$lines = preg_split("/[\r\n]+/", $xref);
$line = $lines[1 + $object_no];
$offset = (int)$line;
if($offset == 0)
return false;
//Read Info object
fseek($f, $offset, SEEK_SET);
$s = fread($f, 1024);
fclose($f);
//Extract properties
if(!preg_match('/<<(.*)>>/Us', $s, $a))
return false;
$n = preg_match_all('|/([a-z]+) ?\((.*)\)|Ui', $a[1], $a);
$prop = array();
for($i=0; $i<$n; $i++)
$prop[$a[1][$i]] = $a[2][$i];
return $prop;
}
?>