-
-
-
-
-
-
-
-
- import os,getopt,sys
-
-
- def read_file(path):
- f = open(path, "r")
- lines = f.readlines()
- f.close()
- return lines
-
-
- def one_line_proc(parts, total, ft_map, outsp, empty_fill):
- toindex = 0
- outline = ""
- keys = ft_map.keys()
- for i in range(1, total+1):
- if i in keys:
- fill_index = ft_map[i]
- if fill_index.startswith("d"):
- outline += fill_index[1:]
- else:
- outline += parts[int(fill_index)-1]
- else:
- outline += empty_fill
- if i !=total:
- outline += outsp
-
-
- return outline
-
-
-
- def process(inpath, total, to, outpath, insp="\t", outsp="\t", empty_fill=""):
-
- ft_map = {}
- in_count = 0
- used_row = []
- for to_row in to:
- if r"\:" not in to_row and len(to_row.split(":"))==2:
- used_row.append(int(to_row.split(":")[1]))
- if r"\=" not in str(to_row) and len(str(to_row).split("="))==2:
- pass
- else:
- in_count += 1
-
- for to_row in to:
- if r"\=" not in str(to_row) and len(str(to_row).split("="))==2:
- ft_map.update({int(to_row.split("=")[0]):"d"+to_row.split("=")[1]})
- continue
- elif r"\:" not in to_row and len(to_row.split(":"))==2:
- ft_map.update({int(to_row.split(":")[0]):to_row.split(":")[1]})
- continue
- else:
- to_index = 0
- for i in range(1, 100):
- if i not in used_row:
- to_index = i
- break
- ft_map.update({int(to_row):str(to_index)})
- used_row.append(to_index)
-
- lines = read_file(inpath)
- f = open(outpath,"w")
- result=[]
- for line in lines:
- parts = line.strip("\n").split(insp)
-
- if len(parts) >= in_count:
- outline = one_line_proc(parts, total, ft_map, outsp, empty_fill)
- result.append(outline+"\n")
- f.writelines(result)
- f.close()
-
-
- def help_msg():
- print("功能:原数据文件转为目标数据格式")
- print("选项:")
- print("\t -i inputfilepath [必输,原文件路径]")
- print("\t -t n [必输,n为数字,目标数据总的域个数]")
- print("\t -a '1,3,4' [必输,域编号字符串,逗号分隔。指定域用原数据字段填充,未指定用'0'填充]")
- print("\t -o outputfilepath [可选,默认为 inputfilepath.dist ]")
- print("\t -F 'FS' [可选,原文件域分隔符,默认为\\t ]")
- print("\t -P 'OFS' [可选,输出文件的域分隔符,默认为\\t ]")
- sys.exit(0)
-
-
- def main():
- try:
- opts,args = getopt.getopt(sys.argv[1:],"F:P:t:a:i:o:f:h")
-
- for op,value in opts:
- if op in ("-h","-H","--help"):
- help_msg()
- if op == "-i":
- inpath = value
- elif op == "-o":
- outpath = value
- elif op == "-t":
- total = int(value)
- elif op == "-a":
- to = value.split(",")
- elif op == "-F":
- insp = value.decode("string_escape")
- elif op == "-P":
- outsp = value.decode("string_escape")
- elif op == "-f":
- empty_fill = value
-
- if len(opts) < 3:
- print(sys.argv[0]+" : the amount of params must great equal than 3")
- sys.exit(1)
-
- except getopt.GetoptError:
- print(sys.argv[0]+" : params are not defined well!")
-
- if 'inpath' not in dir():
- print(sys.argv[0]+" : -i param is needed,input file path must define!")
- sys.exit(1)
-
- if 'total' not in dir():
- print(sys.argv[0]+" : -t param is needed,the fields of result file must define!")
- sys.exit(1)
-
- if 'to' not in dir():
- print(sys.argv[0]+" : -a param is needed,must assign the field to put !")
- sys.exit(1)
-
- if not os.path.exists(inpath):
- print(sys.argv[0]+" file : %s is not exists"%inpath)
- sys.exit(1)
-
- if 'empty_fill' not in dir():
- empty_fill = ''
-
- tmp=[]
- for st in to:
- tmp.append(str(st))
- to=tmp
-
- if 'outpath' not in dir():
- outpath = inpath+".dist"
-
- if 'insp' in dir() and 'outsp' in dir():
- process(inpath,total,to,outpath,insp,outsp,empty_fill=empty_fill)
- elif 'insp' in dir():
- process(inpath,total,to,outpath,insp,empty_fill=empty_fill)
- elif 'outsp' in dir():
- process(inpath,total,to,outpath,outsp=outsp,empty_fill=empty_fill)
- else:
- process(inpath,total,to,outpath,empty_fill=empty_fill)
-
- if __name__ =="__main__":
- main()
|
评论暂时关闭