XML file with the parsing of a document based on constituents which can have nested nodes (sub-trees of any depth).
<document>
<wordcount>181</wordcount>
<cputime>0.084678</cputime>
<paragraph>
<sentence id="1">
<token begin="4" ctag="DA" end="6" form="El" gen="masculine" id="t1.1" lemma="el" num="singular" phon="el" pos="determiner" tag="DA0MS0" type="article">
<morpho>
<analysis ctag="DA" gen="masculine" lemma="el" num="singular" pos="determiner" selected="1" tag="DA0MS0" type="article"/>
</morpho>
</token>
<constituents>
<node head="1" label="grup-verb">
<node label="sn">
<node label="espec-ms">
<node head="1" label="j-ms">
<node head="1" leaf="1" token="t1.1" word="El"/></node>
</node>
<node head="1" label="grup-nom-ms">
<node head="1" label="n-ms">
<node head="1" leaf="1" token="t1.2" word="doctor"/></node>
<node label="w-ms">
<node head="1" leaf="1" token="t1.3" word="Fergusson"/></node>
</node>
</node>
</sentence>
</paragraph>
</document>
I have defined the Classes structure for the structure of the XML (generated by freeling with CoNLL format).
public class Senses
{
[XmlElement(ElementName = "sense")]
public List<Sense> Sense { get; set; }
}
[XmlRoot(ElementName = "node")]
public class Node
{
[XmlAttribute(AttributeName = "head")]
public string Head { get; set; }
[XmlAttribute(AttributeName = "leaf")]
public string Leaf { get; set; }
[XmlAttribute(AttributeName = "token")]
public string Token { get; set; }
[XmlAttribute(AttributeName = "word")]
public string Word { get; set; }
[XmlElement(ElementName = "node")]
public List<Node> Nodo { get; set; }
[XmlAttribute(AttributeName = "label")]
public string Label { get; set; }
}
[XmlRoot(ElementName = "constituents")]
public class Constituents
{
[XmlElement(ElementName = "node")]
public Node Node { get; set; }
}
[XmlRoot(ElementName = "sentence")]
public class Sentence
{
[XmlElement(ElementName = "token")]
public List<Token> Token { get; set; }
[XmlElement(ElementName = "constituents")]
public Constituents Constituents { get; set; }
[XmlAttribute(AttributeName = "id")]
public string Id { get; set; }
}
[XmlRoot(ElementName = "paragraph")]
public class Paragraph
{
[XmlElement(ElementName = "sentence")]
public List<Sentence> Sentence { get; set; }
}
When loading the XML file with an Enumerator and LINQ, it generates the message "Unable to convert the System.Collections.Generic.IEnumerable type in Project.Node implicitly. An explicit conversion already exists (check if a conversion is missing) " I have used ToList () conversion but the error continues. At the foot I send the code of the load. Thank you in advance for the help.
XDocument docuXML = XDocument.Parse(TxtCodificado);
// Carga el archivo y reconoce vocales asentuadas del Español y símbolos. Se debe preveer el lenguaje del texto para
archivoXML.LoadXml(TxtCodificado);
IEnumerable<Paragraph> Parrafos = docuXML.Root.Descendants("paragraph")
.Select(Parrafs => new Paragraph
{
Sentence = Parrafs.Elements("sentence")
.Select(Sentencs => new Sentence
{
Id = (string)Sentencs.Attribute("id"),
Token = Sentencs.Elements("token")
.Select(complex => new Token
{
Begin = (string)complex.Attribute("begin"),
End = (string)complex.Attribute("end"),
Form = (string)complex.Attribute("form"),
Gen = (string)complex.Attribute("gen"),
Id = (string)complex.Attribute("id"),
Case = (string)complex.Attribute("case"),
Ctag = (string)complex.Attribute("ctag"),
Lemma = (string)complex.Attribute("lemma"),
Person = (string)complex.Attribute("person"),
Phon = (string)complex.Attribute("phon"),
Pos = (string)complex.Attribute("pos"),
Tag = (string)complex.Attribute("tag"),
Type = (string)complex.Attribute("type"),
}).ToList(),
Constituents = Sentencs.Elements("constitutens")
.Select(constitu => new Constituents
{
Node = constitu.Elements("node")
.Select(Nd => new Node
{
Head = (string)Nd.Attribute("head"),
Leaf = (string)Nd.Attribute("leaf"),
Token = (string)Nd.Attribute("token"),
Word = (string)Nd.Attribute("word"),
Label = (string)Nd.Attribute("label"),
Nodo = Nd.Elements("node")
.Select(Nod => new Node
{
Leaf = (string)Nod.Attribute("leaf"),
Token = (string)Nod.Attribute("token"),
Word = (string)Nod.Attribute("word"),
Label = (string)Nod.Attribute("label"),
Head = (string)Nod.Attribute("head"),
}).ToList()
})
}).ToList()
}).ToList()
});
return Parrafos.ToList();