I replace the image links in the text with the following format.
{#img='xxxx-xxxx-xxxx-xxxx.abc', alt=''}
Before changing, I get the src
part from the image links and download it to the server with the help of CURL. I do UUID naming for each image downloaded.
All good so far!
$newImageName = create_uuid();
$ch = curl_init($img->getAttribute('src'));
$fp = fopen('/PATH_SAMPLE/' . $newImageName . '.jpg', 'wb');
curl_setopt($ch, CURLOPT_FILE, $fp);
curl_setopt($ch, CURLOPT_HEADER, 0);
curl_exec($ch);
curl_close($ch);
fclose($fp);
However; each image has a different UUID, but as a result of the reformat, the final UUID is rendered to the text; for example like this;
asdasd {#img='19a1cb87-009b-4495-be22-68fb08db8a76', alt=''} asdasd {#img='19a1cb87-009b-4495-be22-68fb08db8a76', alt=''}
All Code;
$jsonFile = "asdasd <img src='https://example.com/image_1.png'> asdasd <img src='https://example.com/image_1.jpg'>";
$dom = new DOMDocument;
$dom->loadHTML($jsonFile);
$imgs = $dom->getElementsByTagName('img');
$imgURLs = [];
foreach ($imgs as $img) {
if (!$img->hasAttribute('src')) {
continue;
} else {
$newImageName = create_uuid();
$ch = curl_init($img->getAttribute('src'));
$fp = fopen('/PATH_SAMPLE/' . $newImageName . '.jpg', 'wb');
curl_setopt($ch, CURLOPT_FILE, $fp);
curl_setopt($ch, CURLOPT_HEADER, 0);
curl_exec($ch);
curl_close($ch);
fclose($fp);
$str = preg_replace('/<img[^>]*src=([\'"])(.*?)\1>/', "{#img='" . $newImageName . "', alt=''}", $jsonFile);
}
}
How can I fix the UUID problem for images?
CodePudding user response:
$newImageName = create_uuid().'.jpg';
assuming ofc that you're OK with incorrectly naming png files as jpg.
btw may i suggest that instead of uuid, if you'd do
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
$image_binary=curl_exec($ch);
$image_name=hash('sha224',$image_binary).'.jpg';
$image_path = '/PATH_SAMPLE/' . $image_name;
if(!file_exists($image_path)){
file_put_contents($image_path,$image_binary);
}
you'll avoid saving duplicate images, wasting diskspace. (also this option can be optimized to not save the entire image in ram by using tmpfile() CURLOPT_FILE hash_file() rename(stream_get_meta_data($tmpfile)['uri']) if you're really worried about memory usage ^^)
aaalso, cut this crap
$str = preg_replace('/<img[^>]*src=([\'"])(.*?)\1>/', "{#img='" . $newImageName . "', alt=''}", $jsonFile);
just do $img->setAttribute("src",$image_name);
instead, and when you're ready to update $jsonFile, do $jsonFile=$dom->saveHTML();
instead.
personally i'd probably write it like
$jsonFile = "asdasd <img src='https://example.com/image_1.png'> asdasd <img src='https://example.com/image_1.jpg'>";
$dom = new DOMDocument ();
$rootName = "root" . bin2hex ( random_bytes ( 10 ) );
$dom->loadHTML ( "<?xml encoding=\"UTF-8\"><{$rootName}>{$jsonFile}</{$rootName}>" );
$imgs = $dom->getElementsByTagName ( 'img' );
$imgURLs = [ ];
$ch = curl_init ();
foreach ( $imgs as $img ) {
if (!$img->hasAttribute ( 'src' )) {
continue;
} else {
$tmphandle = tmpfile ();
$tmpfile = stream_get_meta_data ( $tmphandle ) ['uri'];
curl_setopt_array ( $ch, array (
CURLOPT_FILE => $tmphandle,
CURLOPT_URL => $img->getAttribute ( "src" )
) );
curl_exec ( $ch );
// optimization note: the file hashing could be done incrementally in-ram by using CURLOPT_WRITEFUNCTION hash_init() hash_update() instead of hash_file()
$image_name = hash_file ( 'sha224', $tmpfile ) . '.jpg';
$fp = '/PATH_SAMPLE/' . $image_name;
if (file_exists ( $fp )) {
// this is a duplicate image
} else {
if (PHP_OS_FAMILY === "Windows") {
// optimization note, on pretty much every OS except Windows, you can move files with open handles, but not on Windows..
// this is slower, but Windows-compatible
copy ( $tmpfile, $fp );
} else {
// this is faster, but not Windows-compatible
rename ( $tmpfile, $fp );
}
}
fclose ( $tmpfile );
$img->setAttribute ( "src", $image_name );
}
}
$str = $dom->saveHTML ( $dom->getElementsByTagName ( $rootName )->item ( 0 ) );
$str = substr ( $str, strlen ( "<{$rootName}>" ), -strlen ( "</{$rootName}>" ) );
curl_close ( $ch );