| | |
| | | num_level: int - 列表序号级别,0表示正文 |
| | | num: int - 列表序号,如果是列表 |
| | | children: typing.List[ParagraphInfo] - 子段落列表 |
| | | refs: 引用文档 |
| | | refs: 引用列表 |
| | | entities: 实体词列表 |
| | | is_toc: 是否是目录标志 |
| | | """ |
| | | id: int |
| | | text: str |
| | | title_level: int |
| | | title_num: str |
| | |
| | | children: typing.List |
| | | refs: typing.List |
| | | entities: typing.List[TEntity] |
| | | is_toc: bool |
| | | |
| | | @property |
| | | def title(self): |
| | | if self.title_level: |
| | | text = self.full_text |
| | | idx = text.index('\n') |
| | | return text[0:idx] |
| | | return '' |
| | | |
| | | @property |
| | | def full_text(self): |
| | |
| | | full_text = full_text + "\n" + child.full_text_with_children |
| | | return full_text |
| | | |
| | | def __init__(self, text: str, title_level: int, num=0, num_level=0): |
| | | def __init__(self, text: str, title_level: int, num=0, num_level=0, is_toc=False): |
| | | """ |
| | | 段落信息 |
| | | |
| | |
| | | title_level: int - 段落级别,1-9级标题,0表示正文 |
| | | num: int - 列表序号 |
| | | num_level: int - 列表序号级别 |
| | | is_toc: bool - 是否是目录 |
| | | """ |
| | | self.text = text |
| | | self.title_level = title_level |
| | |
| | | self.num_level = num_level |
| | | self.children: typing.List[ParagraphInfo] = [] |
| | | self.entities: typing.List[TEntity] = [] |
| | | self.refs: typing.List[ParagraphInfo] = [] |
| | | self.is_toc = is_toc |
| | | self.id = 0 |
| | | |
| | | def __str__(self): |
| | | return f"{self.full_text}" |