试一下今天发布的claude-3-7-sonnet-20250219。
官方测评能力图如下:

注意
代码如下,得考虑很多情况,包括全半角符号转换,清洗数据,模糊匹配村级,字典查找,再进行补全。
注:
默认地址为甘肃省陇南市武都区,如使用请更改相关字段。
# python
import re
import unicodedata
# 行政区域数据可自由引入。
# 清洗地址函数
def clean_address(address):
"""清洗地址:处理全角半角字符,删除特殊符号,保留汉字、字母、数字和空格"""
# 将全角字符转换为半角字符
address = unicodedata.normalize('NFKC', address)
# 删除特殊符号,仅保留汉字、字母、数字和空格
address = re.sub(r'[^u4e00-u9fa5a-zA-Z0-9s]', '', address)
# 合并连续空格为一个空格
address = re.sub(r's+', ' ', address)
# 去除首尾空格
address = address.strip()
return address
# 补全地址函数
def complete_address(address, area_data):
"""补全地址:根据行政区域数据补全,处理特殊情况并保留额外信息"""
if not address: # 空地址返回空字符串
return ""
# 处理以"甘肃省"或"陇南市"开头的地址
if address.startswith("甘肃省"):
return address
elif address.startswith("陇南市"):
return "甘肃省" + address
# 特殊处理"磨坝乡"开头的地址
if address.startswith("磨坝乡"):
return f"甘肃省陇南市武都区{address}"
# 特殊处理包含"磨坝乡"的地址
if "磨坝乡" in address:
parts = address.split("磨坝乡", 1)
prefix = parts[0]
suffix = "磨坝乡" + parts[1]
return f"甘肃省陇南市武都区{suffix}"
# 特殊处理"中腰村"的情况 - 映射到"中腰里村"
if "中腰村" in address:
address = address.replace("中腰村", "中腰里村")
# 尝试匹配完整的乡镇名称
for town in sorted(area_data.keys(), key=len, reverse=True): # 按长度排序,优先匹配较长的乡镇名
if town in address:
# 尝试匹配该乡镇下的村名
for village in area_data[town]:
if village in address:
# 找到了乡镇和村名
town_village = town + village
index = address.find(town_village)
if index != -1:
extra_info = address[index + len(town_village):]
return f"甘肃省陇南市武都区{town_village}{extra_info}"
else:
# 乡镇和村名不相邻,可能是"磨坝乡...东岳山村"这种情况
town_index = address.find(town)
village_index = address.find(village)
if town_index < village_index:
extra_info = address[village_index + len(village):]
return f"甘肃省陇南市武都区{town}{village}{extra_info}"
# 只找到乡镇,没有找到村名
index = address.find(town)
extra_info = address[index + len(town):]
return f"甘肃省陇南市武都区{town}{extra_info}"
# 尝试模糊匹配村名
village_matches = []
for town, villages in area_data.items():
for village in villages:
# 完整村名匹配
if village in address:
index = address.find(village)
extra_info = address[index + len(village):]
village_matches.append((index, town, village, extra_info))
# 去掉"村"字的匹配
elif village.endswith("村"):
short_village = village[:-1]
if short_village in address and short_village + "村" not in address: # 确保不是其他村名的一部分
index = address.find(short_village)
extra_info = address[index + len(short_village):]
village_matches.append((index, town, village, extra_info))
if village_matches:
# 按村名在地址中的位置排序,选择最先出现的
village_matches.sort(key=lambda x: x[0])
_, town, village, extra_info = village_matches[0]
return f"甘肃省陇南市武都区{town}{village}{extra_info}"
# 如果无法匹配,返回清洗后的原始地址
return address
# 主程序
if __name__ == "__main__":
input_file = "input.txt" # 输入文件名,请替换为实际路径
output_file = "output.txt" # 输出文件名,请替换为实际路径
# 读取输入文件并处理每一行
with open(input_file, 'r', encoding='utf-8') as f_in, open(output_file, 'w', encoding='utf-8') as f_out:
for line in f_in:
address = line.strip() # 去除首尾换行符
cleaned_address = clean_address(address) # 清洗地址
completed_address = complete_address(cleaned_address, area_data) # 补全地址
f_out.write(completed_address + 'n') # 写入输出文件,保留换行
对于行政区域数据,可以写在程序里,以供查找。
这里示例武都区36个乡镇的部分村级行政区属规划。
# 行政区域数据
area_data = {
"安化镇": set(["上阴坡村", "东坪村", "包家沟村", "双庙村", "周塄干村", "大坪村", "大鹿院村",
"寺林山村", "小峪寺村", "小湾村", "崖羊泉村", "恶家湾村", "文家沟村", "斜山村",
"朱坪村", "李家庙村", "杜家塄村", "杜家湾村", "查家湾村", "甘树湾村", "石大坪村",
"符家塄村", "罗家垭村", "翟家湾村", "许家塄村", "谈家坪村", "谷家坡村", "赵家山村",
"铺底下村", "青崖村", "驮子湾村", "黑虎崖村"]),
"柏林镇": set(["上渠道村", "下渠道村", "五角坪村", "大庄头村", "大社科村", "指甲山村", "李家山村",
"杨地湾村", "杨庄村", "梨树湾村", "浩家沟村", "牙头村", "王家山村", "田家沟村",
"石桥村", "罗湾村", "腰坡村", "袁家坝村", "袁家塄村", "赵家湾村"]),
"城关镇": set(["店沟村", "新村社区居委会", "清水沟社区居委会", "王家庄村", "石家庄村", "黑坝村"]),
"池坝乡": set(["九池村", "孙家磨村", "孟家庄村", "小河村", "新庄村", "池坝村", "红土道村"]),
"枫相乡": set(["东沟村", "亚滩村", "大水沟村", "尹家河村", "崖湾村", "张家院村", "强家湾村",
"李家村", "松咀儿村", "枫相院村", "磨坝村", "草坪村", "麻地湾村", "麻柳村"]),
"佛崖镇": set(["佛崖村", "侯儿坝村", "叶坝村", "孟家村", "村委村", "柏树坝村", "民委村",
"湾儿村", "烟火台村", "熊池坝村", "燕崖村", "牛家湾村", "王沟村", "贾店村", "韩家湾村"]),
"甘泉镇": set(["何家村", "侯山村", "双沟村", "张安村", "成沟村", "旗杆村", "李河村", "杨庙村",
"樊坝村", "渠子村", "玉沟村", "甘泉街村", "童庄村", "董沟村", "赵坪村", "龙湾村"]),
"郭河乡": set(["下成村", "传子山村", "候家湾村", "八海村", "寺山村", "斗子坪村", "旧面村",
"柏桃村", "玉头村", "王董村", "符家山村", "绿化村", "营寨村", "赤洛村", "郭河村", "马儿沟村"]),
"汉林镇": set(["三家地村", "下李村", "周家山村", "姚家村", "杜家山村", "杜家湾村", "潘家山村",
"红土村", "西山地村", "麦坪头村"]),
"汉王镇": set(["包家坝村", "土桥村", "大坪山村", "成家山村", "朱能村", "杨庞村", "汉坪村",
"甘家山村", "甘家沟村", "白崖村", "贾半山村", "问子山村", "陈家坡村", "陈李家村",
"陈龙村", "马仓村", "马半山村"]),
"黄坪镇": set(["崔家村", "成坝村", "杨务沟村", "苟山村", "蒋芦村", "蒿川村", "钟李村", "阳坡村", "黄坪村"]),
"角弓镇": set(["下候子村", "半山村", "白草沟村", "白鹤桥村"]),
"桔柑镇": set(["东村村", "大元坝村", "曹家湾村", "朱元山村", "贺家坪村"]),
"两水镇": set(["后坝村", "土门垭村", "寨子村", "庙坪村", "朱家山村", "杜家沟村", "清水坪村",
"烟墩沟村", "谢家坡村", "马入崖村", "马尾巴村", "黄栌坝村", "龙王山村"]),
"龙坝乡": set(["上寨村", "天池村", "庄窠村", "张庄村", "田河村", "白果村", "红石村",
"铁山村", "鱼关村", "麻滩村"]),
"龙凤乡": set(["上祁家村", "井头山村", "候家山村", "大阳山村", "寺塄干村", "寺背村", "岸家山村",
"李家湾村", "杜家山村", "杨上面村", "杨坪村", "毕家山村", "瓦舌头村", "祁家塄村",
"者家湾村", "艾蒿坪村", "草舌坪村", "袁家那村", "郭阳坡村"]),
"隆兴镇": set(["倒流水村", "包峪寺村", "化马坪套村", "化马湾里村", "四方里村", "对合子村",
"扬家沟村", "木林沟村", "玉家村", "王家坝村", "符家湾村", "苜蓿潭村", "蛇崖寺村",
"谈家坝村", "集昌坝村", "马家沟村"]),
"洛塘镇": set(["八房村", "兴华村", "北雀沟村", "和平村", "塄头坪村", "李家山村", "李家沟村",
"杨家场村", "杨家庄村", "沟底下村", "褚家山村", "豆家阳山村", "郭家坡村", "青崖沟村",
"香树坝村", "马家沟村", "麻地沟村"]),
"马街镇": set(["上板桥村", "何上面村", "刘家山村", "半山村", "卯安子村", "大板村", "姜家山村",
"安坪村", "官化村", "小庄头村", "小洞沟村", "尹家湾村", "张阴山村", "杨湾村",
"柳家坡村", "梨坪村", "樊家山村", "沙坪村", "泉家湾村", "牙头村", "牙里村",
"王山村", "石坪村", "石塄坎村", "菜阳沟村", "赵坪村", "路家那村", "郭能干村",
"马槽沟村", "高桥村"]),
"马营镇": set(["上沟村", "东峪村", "乱石村", "小草湾村", "小金厂村", "巩坪村", "庙儿沟村",
"庞磨村", "张家坪村", "强家沟村", "松坪村", "梁家塄坎村", "水泉村", "渭子坪村",
"碌坪村", "碌碡坝村", "贾家阴坡村", "赤化村", "阳坡村", "陈家塄坎村", "龙沟村"]),
"磨坝藏族乡": set(["东岳山村", "中腰里村", "唐家村", "小板石村", "曹家湾村", "潘家湾村",
"磨坝里村", "竹园子村"]),
"琵琶镇": set(["下高家村", "勿驮村", "唐坝村", "小川坝村", "小河村", "张坝村", "楼低村",
"武家山村", "毛坡村", "毛家沟村", "水磨村", "玄湾村", "王家上沟村", "王家山村",
"琵琶街村", "瓦房坝村", "秋嘴村", "胡家沟村", "谈坝村", "高家坝村", "鸡公眼村",
"麻崖村", "龙潭村"]),
"坪娅藏族乡": set(["铧咀村"]),
"蒲池乡": set(["上巩家村", "下坝村", "下巩家村", "咀台上村", "土桥山村", "坪儿上村", "小乔沟村",
"尚家山村", "张庄村", "木竹年村", "杜塄村", "杨家边村", "汪家坝村", "湾里村",
"王家坪村", "珍嘴村", "石塄岗村", "陈家山村", "马家年村", "麻湾村"]),
"三仓镇": set(["何家坝村", "坪头村", "大石村", "安家山村", "寺沟村", "成坝村", "李家坝村",
"核桃庄村", "水沟村", "沟口坝村", "玉才村", "罗家山村", "罗车寺村", "闹院村"]),
"三河镇": set(["南山村", "宣河村", "小石村", "张半山村", "竹林村", "苍院村", "阳山村", "马河村", "黑沟村"]),
"石门镇": set(["上沟村", "下坪村", "宣麻村", "石门街村", "草坝子村"]),
"外纳镇": set(["关方村", "周家山村", "外纳村", "安宁村", "宗家坝村", "崖角村", "张河坝村",
"曹家坝村", "板仓村", "桃树坪村", "椒元村", "沟渠村", "甘山村", "稻畦村",
"艾下村", "透防村", "锦坪村"]),
"五库镇": set(["上坝村", "下坝村", "佛殿村", "党家村", "土地沟村", "安家坝村", "年家沟村",
"张坝村", "沙坝村", "王坝村", "能干村", "草山村", "蒋家山村", "袁坝村",
"靳家山村", "魏坝村"]),
"五马镇": set(["何家沟村", "李家坝村", "河口村", "石家坝村", "西山村", "金口坝村", "马坝村"]),
"下黄家坝社区": set(["下黄家坝社区"]),
"姚寨镇": set(["何家崖村", "四合村", "姚寨村", "曹家堡村", "长凌村"]),
"鱼龙镇": set(["仓头山村", "刘家湾村", "坪套里村", "孟家山村", "小张家村", "庞家山村",
"林里村", "柏家沟村", "许家湾村", "阴湾里村", "韩家山村", "麻家湾村"]),
"玉皇乡": set(["八石沟村", "坭池子村", "大坪村", "大山村", "安窠村", "小石村", "张底下村",
"杜家村", "杨家湾村", "槐树山村", "玉皇坪村", "罗家里村", "羊圈头村", "老庄村",
"院子村", "马家山村"]),
"裕河镇": set(["余家河村", "坟坪子村", "孙家湾村", "庙坝村", "梨树村", "范家坪村", "赵钱坝村", "阳坝村"]),
"月照乡": set(["三流水村", "专木子村", "六房沟村", "尹家坝村", "草地子村", "赵坝村", "马塄村"])
}
使用:保存为1.py,然后在python3.6或者更高版本运行 python 1.py input output
即可