试一下今天发布的claude-3-7-sonnet-20250219。

官方测评能力图如下:

注意

代码如下,得考虑很多情况,包括全半角符号转换,清洗数据,模糊匹配村级,字典查找,再进行补全。

注:

默认地址为甘肃省陇南市武都区,如使用请更改相关字段。

# python
import re
import unicodedata

# 行政区域数据可自由引入。

# 清洗地址函数
def clean_address(address):
    """清洗地址:处理全角半角字符,删除特殊符号,保留汉字、字母、数字和空格"""
    # 将全角字符转换为半角字符
    address = unicodedata.normalize('NFKC', address)
    # 删除特殊符号,仅保留汉字、字母、数字和空格
    address = re.sub(r'[^u4e00-u9fa5a-zA-Z0-9s]', '', address)
    # 合并连续空格为一个空格
    address = re.sub(r's+', ' ', address)
    # 去除首尾空格
    address = address.strip()
    return address

# 补全地址函数
def complete_address(address, area_data):
    """补全地址:根据行政区域数据补全,处理特殊情况并保留额外信息"""
    if not address:  # 空地址返回空字符串
        return ""

    # 处理以"甘肃省"或"陇南市"开头的地址
    if address.startswith("甘肃省"):
        return address
    elif address.startswith("陇南市"):
        return "甘肃省" + address
    
    # 特殊处理"磨坝乡"开头的地址
    if address.startswith("磨坝乡"):
        return f"甘肃省陇南市武都区{address}"
    
    # 特殊处理包含"磨坝乡"的地址
    if "磨坝乡" in address:
        parts = address.split("磨坝乡", 1)
        prefix = parts[0]
        suffix = "磨坝乡" + parts[1]
        return f"甘肃省陇南市武都区{suffix}"
    
    # 特殊处理"中腰村"的情况 - 映射到"中腰里村"
    if "中腰村" in address:
        address = address.replace("中腰村", "中腰里村")
    
    # 尝试匹配完整的乡镇名称
    for town in sorted(area_data.keys(), key=len, reverse=True):  # 按长度排序,优先匹配较长的乡镇名
        if town in address:
            # 尝试匹配该乡镇下的村名
            for village in area_data[town]:
                if village in address:
                    # 找到了乡镇和村名
                    town_village = town + village
                    index = address.find(town_village)
                    if index != -1:
                        extra_info = address[index + len(town_village):]
                        return f"甘肃省陇南市武都区{town_village}{extra_info}"
                    else:
                        # 乡镇和村名不相邻,可能是"磨坝乡...东岳山村"这种情况
                        town_index = address.find(town)
                        village_index = address.find(village)
                        if town_index < village_index:
                            extra_info = address[village_index + len(village):]
                            return f"甘肃省陇南市武都区{town}{village}{extra_info}"
            
            # 只找到乡镇,没有找到村名
            index = address.find(town)
            extra_info = address[index + len(town):]
            return f"甘肃省陇南市武都区{town}{extra_info}"
    
    # 尝试模糊匹配村名
    village_matches = []
    for town, villages in area_data.items():
        for village in villages:
            # 完整村名匹配
            if village in address:
                index = address.find(village)
                extra_info = address[index + len(village):]
                village_matches.append((index, town, village, extra_info))
            # 去掉"村"字的匹配
            elif village.endswith("村"):
                short_village = village[:-1]
                if short_village in address and short_village + "村" not in address:  # 确保不是其他村名的一部分
                    index = address.find(short_village)
                    extra_info = address[index + len(short_village):]
                    village_matches.append((index, town, village, extra_info))
    
    if village_matches:
        # 按村名在地址中的位置排序,选择最先出现的
        village_matches.sort(key=lambda x: x[0])
        _, town, village, extra_info = village_matches[0]
        return f"甘肃省陇南市武都区{town}{village}{extra_info}"
    
    # 如果无法匹配,返回清洗后的原始地址
    return address

# 主程序
if __name__ == "__main__":
    input_file = "input.txt"  # 输入文件名,请替换为实际路径
    output_file = "output.txt"  # 输出文件名,请替换为实际路径

    # 读取输入文件并处理每一行
    with open(input_file, 'r', encoding='utf-8') as f_in, open(output_file, 'w', encoding='utf-8') as f_out:
        for line in f_in:
            address = line.strip()  # 去除首尾换行符
            cleaned_address = clean_address(address)  # 清洗地址
            completed_address = complete_address(cleaned_address, area_data)  # 补全地址
            f_out.write(completed_address + 'n')  # 写入输出文件,保留换行

对于行政区域数据,可以写在程序里,以供查找。

这里示例武都区36个乡镇的部分村级行政区属规划。

# 行政区域数据
area_data = {
    "安化镇": set(["上阴坡村", "东坪村", "包家沟村", "双庙村", "周塄干村", "大坪村", "大鹿院村", 
                 "寺林山村", "小峪寺村", "小湾村", "崖羊泉村", "恶家湾村", "文家沟村", "斜山村", 
                 "朱坪村", "李家庙村", "杜家塄村", "杜家湾村", "查家湾村", "甘树湾村", "石大坪村", 
                 "符家塄村", "罗家垭村", "翟家湾村", "许家塄村", "谈家坪村", "谷家坡村", "赵家山村", 
                 "铺底下村", "青崖村", "驮子湾村", "黑虎崖村"]),
    "柏林镇": set(["上渠道村", "下渠道村", "五角坪村", "大庄头村", "大社科村", "指甲山村", "李家山村", 
                 "杨地湾村", "杨庄村", "梨树湾村", "浩家沟村", "牙头村", "王家山村", "田家沟村", 
                 "石桥村", "罗湾村", "腰坡村", "袁家坝村", "袁家塄村", "赵家湾村"]),
    "城关镇": set(["店沟村", "新村社区居委会", "清水沟社区居委会", "王家庄村", "石家庄村", "黑坝村"]),
    "池坝乡": set(["九池村", "孙家磨村", "孟家庄村", "小河村", "新庄村", "池坝村", "红土道村"]),
    "枫相乡": set(["东沟村", "亚滩村", "大水沟村", "尹家河村", "崖湾村", "张家院村", "强家湾村", 
                 "李家村", "松咀儿村", "枫相院村", "磨坝村", "草坪村", "麻地湾村", "麻柳村"]),
    "佛崖镇": set(["佛崖村", "侯儿坝村", "叶坝村", "孟家村", "村委村", "柏树坝村", "民委村", 
                 "湾儿村", "烟火台村", "熊池坝村", "燕崖村", "牛家湾村", "王沟村", "贾店村", "韩家湾村"]),
    "甘泉镇": set(["何家村", "侯山村", "双沟村", "张安村", "成沟村", "旗杆村", "李河村", "杨庙村", 
                 "樊坝村", "渠子村", "玉沟村", "甘泉街村", "童庄村", "董沟村", "赵坪村", "龙湾村"]),
    "郭河乡": set(["下成村", "传子山村", "候家湾村", "八海村", "寺山村", "斗子坪村", "旧面村", 
                 "柏桃村", "玉头村", "王董村", "符家山村", "绿化村", "营寨村", "赤洛村", "郭河村", "马儿沟村"]),
    "汉林镇": set(["三家地村", "下李村", "周家山村", "姚家村", "杜家山村", "杜家湾村", "潘家山村", 
                 "红土村", "西山地村", "麦坪头村"]),
    "汉王镇": set(["包家坝村", "土桥村", "大坪山村", "成家山村", "朱能村", "杨庞村", "汉坪村", 
                 "甘家山村", "甘家沟村", "白崖村", "贾半山村", "问子山村", "陈家坡村", "陈李家村", 
                 "陈龙村", "马仓村", "马半山村"]),
    "黄坪镇": set(["崔家村", "成坝村", "杨务沟村", "苟山村", "蒋芦村", "蒿川村", "钟李村", "阳坡村", "黄坪村"]),
    "角弓镇": set(["下候子村", "半山村", "白草沟村", "白鹤桥村"]),
    "桔柑镇": set(["东村村", "大元坝村", "曹家湾村", "朱元山村", "贺家坪村"]),
    "两水镇": set(["后坝村", "土门垭村", "寨子村", "庙坪村", "朱家山村", "杜家沟村", "清水坪村", 
                 "烟墩沟村", "谢家坡村", "马入崖村", "马尾巴村", "黄栌坝村", "龙王山村"]),
    "龙坝乡": set(["上寨村", "天池村", "庄窠村", "张庄村", "田河村", "白果村", "红石村", 
                 "铁山村", "鱼关村", "麻滩村"]),
    "龙凤乡": set(["上祁家村", "井头山村", "候家山村", "大阳山村", "寺塄干村", "寺背村", "岸家山村", 
                 "李家湾村", "杜家山村", "杨上面村", "杨坪村", "毕家山村", "瓦舌头村", "祁家塄村", 
                 "者家湾村", "艾蒿坪村", "草舌坪村", "袁家那村", "郭阳坡村"]),
    "隆兴镇": set(["倒流水村", "包峪寺村", "化马坪套村", "化马湾里村", "四方里村", "对合子村", 
                 "扬家沟村", "木林沟村", "玉家村", "王家坝村", "符家湾村", "苜蓿潭村", "蛇崖寺村", 
                 "谈家坝村", "集昌坝村", "马家沟村"]),
    "洛塘镇": set(["八房村", "兴华村", "北雀沟村", "和平村", "塄头坪村", "李家山村", "李家沟村", 
                 "杨家场村", "杨家庄村", "沟底下村", "褚家山村", "豆家阳山村", "郭家坡村", "青崖沟村", 
                 "香树坝村", "马家沟村", "麻地沟村"]),
    "马街镇": set(["上板桥村", "何上面村", "刘家山村", "半山村", "卯安子村", "大板村", "姜家山村", 
                 "安坪村", "官化村", "小庄头村", "小洞沟村", "尹家湾村", "张阴山村", "杨湾村", 
                 "柳家坡村", "梨坪村", "樊家山村", "沙坪村", "泉家湾村", "牙头村", "牙里村", 
                 "王山村", "石坪村", "石塄坎村", "菜阳沟村", "赵坪村", "路家那村", "郭能干村", 
                 "马槽沟村", "高桥村"]),
    "马营镇": set(["上沟村", "东峪村", "乱石村", "小草湾村", "小金厂村", "巩坪村", "庙儿沟村", 
                 "庞磨村", "张家坪村", "强家沟村", "松坪村", "梁家塄坎村", "水泉村", "渭子坪村", 
                 "碌坪村", "碌碡坝村", "贾家阴坡村", "赤化村", "阳坡村", "陈家塄坎村", "龙沟村"]),
    "磨坝藏族乡": set(["东岳山村", "中腰里村", "唐家村", "小板石村", "曹家湾村", "潘家湾村", 
                   "磨坝里村", "竹园子村"]),
    "琵琶镇": set(["下高家村", "勿驮村", "唐坝村", "小川坝村", "小河村", "张坝村", "楼低村", 
                 "武家山村", "毛坡村", "毛家沟村", "水磨村", "玄湾村", "王家上沟村", "王家山村", 
                 "琵琶街村", "瓦房坝村", "秋嘴村", "胡家沟村", "谈坝村", "高家坝村", "鸡公眼村", 
                 "麻崖村", "龙潭村"]),
    "坪娅藏族乡": set(["铧咀村"]),
    "蒲池乡": set(["上巩家村", "下坝村", "下巩家村", "咀台上村", "土桥山村", "坪儿上村", "小乔沟村", 
                 "尚家山村", "张庄村", "木竹年村", "杜塄村", "杨家边村", "汪家坝村", "湾里村", 
                 "王家坪村", "珍嘴村", "石塄岗村", "陈家山村", "马家年村", "麻湾村"]),
    "三仓镇": set(["何家坝村", "坪头村", "大石村", "安家山村", "寺沟村", "成坝村", "李家坝村", 
                 "核桃庄村", "水沟村", "沟口坝村", "玉才村", "罗家山村", "罗车寺村", "闹院村"]),
    "三河镇": set(["南山村", "宣河村", "小石村", "张半山村", "竹林村", "苍院村", "阳山村", "马河村", "黑沟村"]),
    "石门镇": set(["上沟村", "下坪村", "宣麻村", "石门街村", "草坝子村"]),
    "外纳镇": set(["关方村", "周家山村", "外纳村", "安宁村", "宗家坝村", "崖角村", "张河坝村", 
                 "曹家坝村", "板仓村", "桃树坪村", "椒元村", "沟渠村", "甘山村", "稻畦村", 
                 "艾下村", "透防村", "锦坪村"]),
    "五库镇": set(["上坝村", "下坝村", "佛殿村", "党家村", "土地沟村", "安家坝村", "年家沟村", 
                 "张坝村", "沙坝村", "王坝村", "能干村", "草山村", "蒋家山村", "袁坝村", 
                 "靳家山村", "魏坝村"]),
    "五马镇": set(["何家沟村", "李家坝村", "河口村", "石家坝村", "西山村", "金口坝村", "马坝村"]),
    "下黄家坝社区": set(["下黄家坝社区"]),
    "姚寨镇": set(["何家崖村", "四合村", "姚寨村", "曹家堡村", "长凌村"]),
    "鱼龙镇": set(["仓头山村", "刘家湾村", "坪套里村", "孟家山村", "小张家村", "庞家山村", 
                 "林里村", "柏家沟村", "许家湾村", "阴湾里村", "韩家山村", "麻家湾村"]),
    "玉皇乡": set(["八石沟村", "坭池子村", "大坪村", "大山村", "安窠村", "小石村", "张底下村", 
                 "杜家村", "杨家湾村", "槐树山村", "玉皇坪村", "罗家里村", "羊圈头村", "老庄村", 
                 "院子村", "马家山村"]),
    "裕河镇": set(["余家河村", "坟坪子村", "孙家湾村", "庙坝村", "梨树村", "范家坪村", "赵钱坝村", "阳坝村"]),
    "月照乡": set(["三流水村", "专木子村", "六房沟村", "尹家坝村", "草地子村", "赵坝村", "马塄村"])
}

使用:保存为1.py,然后在python3.6或者更高版本运行 python 1.py input output 即可