Skip to content

passage

autocorpus.ac_bioc.passage ¤

This module defines the BioC class.

BioC extends BioC to include additional functionality for handling data, such as column headings and data sections.

Classes¤

BioCPassage(text=str(), offset=int(), infons=dict(), sentences=list(), annotations=list(), relations=list()) dataclass ¤

Bases: DataClassJsonMixin

Represents a passage in a BioC document.

Functions¤
from_ac_dict(passage) classmethod ¤

Create a BioCPassage from a passage dict and an offset.

Parameters:

Name Type Description Default
passage dict[str, Any]

dict containing info about passage

required

Returns:

Type Description
BioCPassage

BioCPassage object

Source code in autocorpus/ac_bioc/passage.py
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
@classmethod
def from_ac_dict(cls, passage: dict[str, Any]) -> BioCPassage:
    """Create a BioCPassage from a passage dict and an offset.

    Args:
        passage: dict containing info about passage

    Returns:
        BioCPassage object
    """
    infons = {k: v for k, v in passage.items() if k not in _DEFAULT_KEYS}
    # TODO: Doesn't account for subsubsection headings which might exist
    if heading := passage.get("section_heading", None):
        infons["section_title_1"] = heading
    if subheading := passage.get("subsection_heading", None):
        infons["section_title_2"] = subheading
    for i, section_type in enumerate(passage["section_type"]):
        infons[f"iao_name_{i + 1}"] = section_type["iao_name"]
        infons[f"iao_id_{i + 1}"] = section_type["iao_id"]

    return cls(offset=passage.get("offset", 0), infons=infons, text=passage["body"])
from_title(title, offset) classmethod ¤

Create a BioCPassage from a title and offset.

Parameters:

Name Type Description Default
title str

Passage title

required
offset int

Passage offset

required

Returns:

Type Description
BioCPassage

BioCPassage object

Source code in autocorpus/ac_bioc/passage.py
58
59
60
61
62
63
64
65
66
67
68
69
70
@classmethod
def from_title(cls, title: str, offset: int) -> BioCPassage:
    """Create a BioCPassage from a title and offset.

    Args:
        title: Passage title
        offset: Passage offset

    Returns:
        BioCPassage object
    """
    infons = {"iao_name_1": "document title", "iao_id_1": "IAO:0000305"}
    return cls(offset=offset, infons=infons, text=title)
from_xml(elem) classmethod ¤

Create a BioCPassage instance from an XML element.

Parameters:

Name Type Description Default
elem Element

An XML element representing a passage.

required

Returns:

Name Type Description
BioCPassage BioCPassage

An instance of BioCPassage populated with the provided XML data.

Source code in autocorpus/ac_bioc/passage.py
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
@classmethod
def from_xml(cls, elem: ET.Element) -> BioCPassage:
    """Create a BioCPassage instance from an XML element.

    Args:
        elem (ET.Element): An XML element representing a passage.

    Returns:
        BioCPassage: An instance of BioCPassage populated with the provided XML data.
    """
    offset = int(elem.findtext("offset", default="0"))
    text = elem.findtext("text", default="")

    infons = {
        e.attrib["key"]: e.text for e in elem.findall("infon") if e.text is not None
    }

    sentences = [
        BioCSentence.from_xml(s_elem) for s_elem in elem.findall("sentence")
    ]

    return cls(
        text=text,
        offset=offset,
        infons=infons,
        sentences=sentences,
    )
to_xml() ¤

Convert the BioCPassage instance to an XML element.

Returns:

Type Description
Element

ET.Element: An XML element representation of the BioCPassage instance.

Source code in autocorpus/ac_bioc/passage.py
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
def to_xml(self) -> ET.Element:
    """Convert the BioCPassage instance to an XML element.

    Returns:
        ET.Element: An XML element representation of the BioCPassage instance.
    """
    passage_elem = ET.Element("passage")

    for k, v in self.infons.items():
        infon = ET.SubElement(passage_elem, "infon", {"key": k})
        infon.text = v

    offset_elem = ET.SubElement(passage_elem, "offset")
    offset_elem.text = str(self.offset)

    text_elem = ET.SubElement(passage_elem, "text")
    text_elem.text = self.text

    for sentence in self.sentences:
        passage_elem.append(sentence.to_xml())

    return passage_elem