Commit 02d2294f authored by Couture's avatar Couture
Browse files

Add PDFPlus module in MaarchCapture

parent 19c240c1
<?xml version="1.0" encoding="UTF-8"?>
<capture>
<modules>
<module name="PDFPlus" src="modules/PDFPlus/PDFPlus.php" type="class"/>
</modules>
<batches>
<batch directory="/opt/Tmp_MaarchCapture" id="{batchname}-{timestamp}-{rand}" lock="0" name="PDF_PLUS_S">
<workflow debug="true" logMode="Maarch" maarchLogParam="/var/www/html/Expresso17/custom/cs_Demo_DIS/apps/maarch_entreprise/xml/log4php.xml" maarchLoggerName="loggerTechnique" name="PDF_PLUS">
<step function="ImportFiles" module="PDFPlus" name="Tache_S">
<input name="Directory_in">/opt/TestMaarchCapture/</input>
<input name="Directory_out">/opt/TestMaarchCaptureOut/</input>
<input name="Config_File">/opt/toto.ini</input>
<input name="Prefix">out_</input>
<input name="Extensions">pdf</input>
</step>
</workflow>
</batch>
<batch directory="/opt/Tmp_MaarchCapture" id="{batchname}-{timestamp}-{rand}" lock="0" name="PDF_PLUS_M">
<workflow debug="true" logMode="Maarch" maarchLogParam="/var/www/html/Expresso17/custom/cs_Demo_DIS/apps/maarch_entreprise/xml/log4php.xml" maarchLoggerName="loggerTechnique" name="PDF_PLUS">
<step function="ImportFiles" module="PDFPlus" name="Tache_M">
<input name="Directory_in">/opt/TestMaarchCaptureOut/</input>
<input name="Directory_out">/opt/TestMaarchCaptureOutFinal/</input>
<input name="Config_File">/opt/toto2.ini</input>
<input name="Prefix">fin_</input>
<input name="Extensions">pdf</input>
</step>
</workflow>
</batch>
</batches>
</capture>
\ No newline at end of file
<?php
class PDFPlus
extends DOMXPath
{
private $Batch;
private $Directory_out;
private $Prefix;
private $Config_File;
private $Recursive = false;
private $CreateFolders = false;
private $Extensions = array();
private $NbMaxFoldersToImport = 0;
private $DeleteSubFolders = false;
private $SubFoldersToDel = array();
function __construct()
{
$this->Batch = $_SESSION['capture']->Batch;
$Config = new DOMDocument();
/*$Config->load(
__DIR__ . DIRECTORY_SEPARATOR . "PDFPlus.xml"
);*/
parent::__construct($Config);
}
function ImportFiles(
$Directory_in,
$Directory_out,
$Config_File,
$Prefix = "",
$Extensions=array(),
$Recursive=true,
$CreateFolders=false,
$NbMaxFoldersToImport = 0,
$DeleteSubFolders = false
) {
$this->Directory_out = $Directory_out;
$this->Prefix = $Prefix;
$this->Config_File = $Config_File;
$this->Recursive = $Recursive;
$this->CreateFolders = $CreateFolders;
if (!is_array($Extensions)) $Extensions = explode(",",$Extensions);
$this->Extensions = $Extensions;
$this->NbMaxFoldersToImport = $NbMaxFoldersToImport;
$this->DeleteSubFolders = $DeleteSubFolders;
$_SESSION['capture']->logEvent(
"Scanning directory $Directory_in for file import..."
);
$_SESSION['capture']->logEvent(
"Config_File = $Config_File"
);
$_SESSION['capture']->logEvent(
"Extensions = ".print_r($this->Extensions,true)
);
if(!file_exists($Config_File)){
$_SESSION['capture']->logEvent(
"Unable to open configuration file '$Config_File' !", 2
);
trigger_error(
"Unable to open configuration file '$Config_File' !",
E_USER_ERROR
);
}
$result =
$this->ScanDirectory(
$Directory_in,
$this->Batch
);
if ($DeleteSubFolders) {
$cptFoldersToDel = count($this->SubFoldersToDel);
for ($i=0;$i<$cptFoldersToDel;$i++) {
$_SESSION['capture']->logEvent(
"Delete directory " . $this->SubFoldersToDel[$i]
);
rmdir($this->SubFoldersToDel[$i]);
}
}
return $result;
}
function ScanDirectory(
$Directory,
$Parent
) {
/********************************************************************************
** Open Directory and import files to batch on new nodes
********************************************************************************/
$dirhdl = opendir($Directory);
if(!$dirhdl) {
$_SESSION['capture']->logEvent(
"Unable to open directory '$Directory' !", 2
);
trigger_error(
"Unable to open directory '$Directory' !",
E_USER_ERROR
);
}
if($this->CreateFolders) {
$_SESSION['capture']->logEvent(
"Adding Folder with path '$Directory'"
);
$Container =
$Parent->addFolder(
$Directory
);
} else {
$Container = $Parent;
}
$nbDir = 0;
while($entry_name = readdir($dirhdl)) {
$entry_path = $Directory . DIRECTORY_SEPARATOR . $entry_name;
/* not a file or sub folder
********************************************************************************/
if($entry_name == '.' || $entry_name == '..')
continue;
/* sub folder, process recursively if requested
********************************************************************************/
if(is_dir($entry_path)) {
if($this->Recursive) {
$nbDir++;
if ($nbDir <= $this->NbMaxFoldersToImport) {
array_push($this->SubFoldersToDel, $entry_path);
$this->ScanDirectory(
$entry_path,
$Container
);
} else {
break;
}
}
continue;
}
/* If extensions filtered, check extension
********************************************************************************/
$entry_ext = substr(strrchr($entry_path , '.'), 1);
if(count($this->Extensions) > 0) {
if(!in_array($entry_ext, $this->Extensions)) {
$this->discard(
$entry_path,
$entry_name
);
continue;
}
}
$_SESSION['capture']->logEvent(
"pdfplus '$entry_path' -config".$this->Config_File
);
copy($entry_path, $this->Directory_out.$this->Prefix.$entry_name);
sleep(rand(1,20));
unlink($entry_path);
}
}
function discard(
$entry_path,
$entry_name
) {
/********************************************************************************
** Original File action
********************************************************************************/
switch ($this->Action) {
case 'move':
$_SESSION['capture']->logEvent(
"Moving imported document to directory $MoveDirectory"
);
rename($entry_path, $this->MoveDirectory . DIRECTORY_SEPARATOR . $entry_name);
break;
case 'delete':
$_SESSION['capture']->logEvent(
"Deleting imported document"
);
unlink($entry_path);
break;
case 'none':
default:
// Nothing
}
}
}
?>
\ No newline at end of file
***********************
Version
***********************
1.0
- Module of Maarch Capture tool which move PDF files from a directory to another. PDFPlus process is executed for each file : separation with barcode (S task), convert image PDF file to text PDF file (M task).
***********************
Configuration file
***********************
5 attributes for the tasks :
- Directory_in : Input directory
- Directory_out : Output directory
- Config_File : pdfplus process configuration file
- Prefix (optional): prefix which be added on output filename
- Extensions (optional): filter files by extension (separated by comma)
\ No newline at end of file
#!/bin/sh
cd $1
if [ -e /opt/.flag_M ]; then
exit
fi
touch /opt/.flag_M
php MaarchCapture.php init -BatchName PDF_PLUS_M &
wait
rm /opt/.flag_M
\ No newline at end of file
#!/bin/sh
cd $1
if [ -e /opt/.flag_S ]; then
exit
fi
touch /opt/.flag_S
php MaarchCapture.php init -BatchName PDF_PLUS_S &
wait
rm /opt/.flag_S
\ No newline at end of file
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment