I need to find different words and small sentences of up to 5 words long (or could be customizable) from one text to another. The differences I want to keep in an array, so that only the words or phrases are saved without repeating (it is checked beforehand if it already exists). At the moment I have managed to find different words for me, but I do not know how to make that now I keep looking for phrases of 2 words (consecutive), in the next iteration of 3 words, 4 ... and so on
var texto = "Aquí va el el primer texto a analizar e incluso con palabras repetidas repetidas";
var texto2 = "Aquí va el segundo texto a analizar analizar e incluso con algunas palabras repetidas";
var palabras = texto.split(" ");
var palabras2 = texto2.split(" ");
var diferentes = [];
var max_palabras = 3;
var max_busquedas = 50; //hasta que diferentes.length>=50
var cont_palabras = 0;
var frase = "";
var newStr = "";
var addWord = false;
do {
for (var i=0; i<palabras.length; i++) {
console.log("añade palabra = "+addWord);
if (texto2.includes(palabras[i]) == false) {
if (diferentes.includes(palabras[i]) == false) { //si no existe en el array de diferencias
diferentes.push(palabras[i]);
}
}
if (cont_palabras < 1) {
frase = palabras[i];
console.log("FRASE = "+frase);
}
if (cont_palabras >= 1 && addWord == true) {
frase = frase + " " + palabras[i+cont_palabras];
addWord = false;
console.log("FRASE = "+frase);
console.log("Nueva frase construida (cont = "+ (cont_palabras+1) +") = "+ frase);
}
}
cont_palabras++;
addWord = true;
} while (cont_palabras <= max_palabras);
for (var i=0; i<palabras2.length; i++) {
if (texto.includes(palabras2[i]) == false) {
if (diferentes.includes(palabras2[i]) == false) { //si no existe en el array de diferencias
diferentes.push(palabras2[i]);
}
}
}
for (var j=0; j<diferentes.length; j++) {
console.log("Elemento "+ j + " diferente en ambos textos = "+diferentes[j])
}
How can I make each time I pick up one more word from the text1 and analyzing if it exists in the text2, always looking for all the possible sentences (but the words must be consecutive)
PD. I've updated the code again with some progress, but now I do not know how to make the text use 2 in 2 words, 3 in 3 ... etc