52 lines
2.1 KiB
Python
52 lines
2.1 KiB
Python
|
import os
|
|||
|
import xml.etree.ElementTree as ET
|
|||
|
|
|||
|
def remove_tag_from_element(element, tag):
|
|||
|
"""
|
|||
|
递归遍历element的所有子节点,删除所有匹配指定标签的子节点。
|
|||
|
"""
|
|||
|
for child in list(element): # 用 list() 避免在遍历时修改子节点列表导致问题
|
|||
|
if child.tag == tag:
|
|||
|
element.remove(child)
|
|||
|
else:
|
|||
|
remove_tag_from_element(child, tag)
|
|||
|
|
|||
|
def remove_tag_from_xml(xml_file, tag_to_remove, output_file):
|
|||
|
"""
|
|||
|
解析单个 XML 文件,删除其中所有指定的标签,然后保存为新的 XML 文件。
|
|||
|
"""
|
|||
|
try:
|
|||
|
tree = ET.parse(xml_file)
|
|||
|
root = tree.getroot()
|
|||
|
# 如果根节点本身就是要删除的标签,则提示并跳过处理
|
|||
|
if root.tag == tag_to_remove:
|
|||
|
print(f"警告:{xml_file} 的根标签就是要删除的标签({tag_to_remove}),跳过该文件。")
|
|||
|
return
|
|||
|
|
|||
|
remove_tag_from_element(root, tag_to_remove)
|
|||
|
tree.write(output_file, encoding="utf-8", xml_declaration=True)
|
|||
|
print(f"处理完成: {xml_file} -> {output_file}")
|
|||
|
except Exception as e:
|
|||
|
print(f"处理 {xml_file} 时出错: {e}")
|
|||
|
|
|||
|
def batch_remove_tag(input_dir, output_dir, tag_to_remove):
|
|||
|
"""
|
|||
|
批量处理:遍历 input_dir 下所有 XML 文件,删除指定标签后,将新文件保存到 output_dir。
|
|||
|
"""
|
|||
|
if not os.path.exists(output_dir):
|
|||
|
os.makedirs(output_dir)
|
|||
|
|
|||
|
for file in os.listdir(input_dir):
|
|||
|
if file.lower().endswith(".xml"):
|
|||
|
input_path = os.path.join(input_dir, file)
|
|||
|
output_path = os.path.join(output_dir, file)
|
|||
|
remove_tag_from_xml(input_path, tag_to_remove, output_path)
|
|||
|
|
|||
|
if __name__ == "__main__":
|
|||
|
# 请修改以下路径和要删除的标签名称
|
|||
|
input_directory = r"C:\Users\10561\Downloads\Compressed\xml" # 输入 XML 文件所在目录
|
|||
|
output_directory = "path_to_output_xml_directory" # 处理后 XML 文件保存目录
|
|||
|
tag_to_delete = "segmented" # 例如:删除 <segmented> 标签,可改为你需要删除的标签名
|
|||
|
|
|||
|
batch_remove_tag(input_directory, output_directory, tag_to_delete)
|