The first thing that needs to be clarified is that Tesseract.js is not able to read PDF files, only images.
Starting from that base, you need to add the action of converting the file that is selected in the input. This is done by adding an onchange event handler, which can be done in several ways:
In the same input element:
<input type="file" id="fileInput" onchange="procesarFichero"/>
Adding the driver directly to the input element just after painting it on the screen:
<input type="file" id="fileInput"/>
<script type="text/javascript">
document.getElementById('fileInput').addEventListener("change", procesarFichero);
</script>
This is necessary, but what really matters is the function that is defined to control the event, which has to be in the following way:
function procesarFichero(){
var name = document.getElementById('fileInput');
if(name != null && name.files != null && name.files.length==1){
var file = name.files.item(0);
//alert('Name: ' + file.name);
//alert('Size: ' + file.size);
//alert('Type: ' + file.type);
Tesseract.recognize(file).progress(function(message){
console.log('Progress is: ', message)
}).then(function(data){
console.log(data);
});
}
}
This way you will be using Tesseract.js correctly. For more information you can go to Tessaract.js documentation .
p>
Applying the changes on the HTML code that you have given, it would stay like this:
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<meta http-equiv="X-UA-Compatible" content="ie=edge">
<title>Document</title>
</head>
<body>
<script src="https://cdn.rawgit.com/naptha/tesseract.js/1.0.10/dist/tesseract.js"></script>
<label for="fileInput">
<div>Click this div and select a file</div>
</label>
<input type="file" id="fileInput" onchange="procesarFichero()"/>
<br />
<br />
<span>Text from image:</span>
<br />
<textarea id="remoseTextarea" rows="30" cols="50"></textarea>
<script>
var isWorking = false;
function procesarFichero(){
isWorking = true;
checkInputs(isWorking)
var name = document.getElementById('fileInput');
if(name != null && name.files != null && name.files.length==1){
var file = name.files.item(0);
//alert('Name: ' + file.name);
//alert('Size: ' + file.size);
//alert('Type: ' + file.type);
Tesseract.recognize(file).progress(function(message){
console.log('Progress is: ', message)
}).then(function(data){
console.log(data);
document.getElementById('remoseTextarea').value = data.text;
isWorking = false;
checkInputs(isWorking)
});
}
}
function checkInputs(disableFields){
document.getElementById('fileInput').disabled = disableFields;
document.getElementById('remoseTextarea').disabled = disableFields;
}
</script>
</body>
</html>