Python

python 的 name

这篇文章The Reason Behind if name == ‘main’ in Python 是出自medium 作者 Jordan Williams __name__ 本文通过两个例子讲解了`if` name `== "`main":，在多文件项目中的作用，主要从两个 current_script.py import other_script print('current_script __name__ is set to {} '.format(__name__)) other_script.py print('other_script __name__ is set to {} '.format(__name__)) 输出: # python3 current_script.py other_script __name__ is set to other_script current_script __name__ is set to __main__ # python3 other_script.py other_script __name__ is set to __main__ name代表当前正在运行的模块/文件名称，直接运行的模块/文件该值为main, 对于导入模块，其值为模块名 java中的main方法 Java中每个类可以main方法如下 public class Application { public static void main(String[] args) { //construct objects here } } main方法是一个静态方法，常用对类进行单元测试。 ...

python excel文件中指定字段是否相等

工作需要检查相关excel数据信息, 查询一个表格中的指定位置编号，检查相关多个个相关表格中指定位置编号是否相同。环境 windows10 python3.7 + openpyxl + xlrd 测试数据目录结构 C:. │ checklist.py │ reference.xlsx │ └─newdir ├─dir1 │ data1.xls │ └─dir2 data2.xls 文件内容参考表格 reference.xlsx A CODE-12345 表格1 data1.xls A CODE-12345 表格2 data2.xlsx A CODE-67890 代码 from openpyxl import load_workbook from openpyxl import Workbook import os import xlrd ## 获取待检查清单列表 def get_file(root_path,all_files=[]): files = os.listdir(root_path) for file in files: if not os.path.isdir(root_path + '/' + file): # 检查文件名包含"功能点"，以 "xls", "xlsx"为后缀的文件 if (file.endswith('.xls') or file.endswith('.xlsx')) \ and 'XXXX' in file: all_files.append(root_path + '/' + file) else: # 如果是目录，目录后增加'/'递归查找符合条件的文件 get_file((root_path+'/'+file),all_files) return all_files ## 获取参考文件指定位置编号 def get_request_no(root_path): files = os.listdir(root_path) for file in files: if not os.path.isdir(root_path + '/' + file): if file.endswith('.xls') or file.endswith('.xlsx') \ and 'YYYY' in file: print (file) ## only xlsx wb_in = load_workbook(file) ws_in = wb_in.active ## 查找第2行，第5列的编号 request_no = ws_in.cell(row=2, column=5) print (request_no.value + '\n') ##break return request_no.value; ## 遍历参考列表文件记录与参考文件表中需求编号不同的文件名 def check_file(files, request_no): result="" for file in files: ## 由于openpyxl, 不支持xls，这里使用的是xlrd库 wb_in = xlrd.open_workbook(file) ws_in = wb_in.sheets()[0] ## 编号是从0开始计数，第8行，第2列中的数据 file_request_no = ws_in.cell(rowx=7, colx=1) if str(file_request_no.value) != str(request_no): s = file_request_no.value + file +'\n' result += s return result def write_log(result): file=open("result.txt", "w") file.write(result) file.close() path = r'.\\ZZZZ' list=get_file(path) path = r'.' no=get_request_no(path) result=check_file(list, no) write_log(result) 运行结果 > python check.py > type result.txt CODE-67890.\\newdir/dir2/data2.xls 参考 Openpyxl Doc xlrd- API Reference

python修改excel文档相关日期信息

工作需要修改excel文件创建内容时间(excel属性非文件属性)和上次修改时间, 用python写了一个脚本处理了一下。环境： windows10 python3.7 + openpyxl 1. 使用openpyxl修改excel属性 import openpyxl from datetime import datetime fh = openpyxl.load_workbook("results.xlsx") obj = fh.properties #To get old properties print( obj ) # print old properties fh.properties.created = datetime(2000,1,1, 8,30,11) fh.properties.modified = datetime(2000,2,1, 8,32,19) ##similarly you can set other fields ## new_obj = fh.properties #Now get new properties print ( new_obj ) # print new properties fh.save("results2.xlsx") 2.使用os.utime方法修改文件属性的访问时间,修改时间 import os, sys,time from datetime import datetime # Showing stat information of file stinfo = os.stat('results.xlsx') print (stinfo) # Using os.stat to recieve atime and mtime of file print ("access time of results.xlsx: %s" %stinfo.st_atime) print ("modified time of results.xlsx: %s" %stinfo.st_mtime) # Modifying atime and mtime t = datetime(2008, 1, 1, 12,12, 12) atime = time.mktime(t.timetuple()) t = datetime(2009, 1, 1, 12,12, 12) mtime = time.mktime(t.timetuple()) os.utime("results.xlsx",(atime, mtime)) print ("done!!") 3.使用pywin32-SetFileTime方法修改文件属性的创建日期, 访问时间,修改时间 from win32file import CreateFile, SetFileTime, GetFileTime, CloseHandle from win32file import GENERIC_READ, GENERIC_WRITE, OPEN_EXISTING from pywintypes import Time import time from datetime import datetime def modifyFileTime(filePath, createTime, modifyTime, accessTime): try: fh = CreateFile(filePath, GENERIC_READ | GENERIC_WRITE, 0, None, OPEN_EXISTING, 0, 0) createTimes = Time(createTime) accessTimes = Time(modifyTime) modifyTimes = Time(accessTime) print (createTimes, accessTimes, modifyTimes) SetFileTime(fh, createTimes, accessTimes, modifyTimes) CloseHandle(fh) return 0 except: return 1 if __name__ == '__main__': t = datetime (2019, 12,13,21,51,2) cTime = time.mktime(t.timetuple()) t = datetime (2019, 2,2,0,1,3) mTime = time.mktime(t.timetuple()) t = datetime (2019, 2,2,0,1,4) aTime = time.mktime(t.timetuple()) fName = r"results.xlsx" r = modifyFileTime(fName, cTime, mTime, aTime) if r == 0: print('修改完成') elif r == 1: print('修改失败') 参考及引用 Openpyxl Doc python 修改文件的创建时间、修改时间、访问时间 pythondoc-os.utime In Python, how do you convert a datetime object to seconds? python ImportError: No module named win32file

python根据excel内容生成文件夹

python根据excel内容生成文件夹工作需要根据excel数据信息，生成对应的文件夹，用python写了一个脚本处理了一下。样例中表格内容也进行了调整。环境： windows10 python3.7 + openpyxl 目标：以list.xlsx数据为准，抽取指定sheet页中的指定字段信息，生成目录, 并在目录中创建固定的子目录. 表格格式: 表格sheet-A01 序号项目名称备注 1 项目1 张三 2 项目2 李四表格sheet-A02 序号项目名称备注 1 项目1 王五每一个sheet存放一个批次的任务信息, total为批次名称代码: from openpyxl import load_workbook from openpyxl import Workbook import os import re ## 获取项目清单列表 wb = load_workbook( filename = '.\\test\\list.xlsx') #ws_in = wb_in.active outpath=".\\out\\" ## 名字列表开始结束行号 start_index = 4 ##end_index = ## 目录编号 dir_no = 1 ## 1. 找到指定的sheet页面 sheets_name = ['A01', 'A02'] ## 3. 组成符合要求的目录字符串创建目录 def mkdir(path): try: os.makedirs(path) print("CREATE") except FileExistsError: print("EXIST, NO CREATE") # Get all dirs def mksubdir(path): try: dirnames = [name for name in os.listdir('.\\model') if os.path.isdir(os.path.join('.\\model', name))] for i in dirnames: mkdir(path+"\\"+i) except : print(" CREATE SUB DIR ERROR ") for i in range(len(sheets_name)): print(i) sheet = wb[sheets_name[i]] print(sheet) ## 2. 找到相关的任务信息 for row in range(start_index, sheet.max_row+1): ## 1. 判断内容是否空格 0 or 1? if (str(sheet.cell(row, column=1).value).strip() == "None" ): print(batch_no + " is over") break; ##2. 获取表格内容生成目录 task_no = sheet.cell(row, column=1).value batch_no = sheet.title.split('-')[0] task_context = str(sheet.cell(row, column=2).value).strip() task_manager = re.search(r'张三|李四|王五', sheet.cell(row,column=3).value) task_leader = task_manager.group(0) #(序号)-批次号-批次序号-任务名称(负责人) # mkdir(path) dir_no_str = format(dir_no, '02d') task_no_str = format(task_no, '02d') dirname=dir_no_str+"-"+batch_no+"-"+task_no_str+"-"+task_context+"-"+task_leader mkdir(outpath+dirname) # mksubdir(path) mksubdir(outpath+dirname) dir_no = dir_no+1 运行结果 ├─01-A01-01-项目1-张三 │ ├─01_文档目录1 │ └─02_文档目录2 ├─02-A01-02-项目2-李四 │ ├─01_文档目录1 │ └─02_文档目录2 └─03-A02-01-项目3-王五 ├─01_文档目录1 └─02_文档目录2 ...

python汇总excel数据

工作需要汇总整理相关excel数据信息，并按照规定格式进行反馈，用python写了一个脚本处理了一下。样例中表格内容也进行了调整。环境： windows10 python3.7 + openpyxl 目标：以data1数据为准，通过查找data2补全相关信息, 将数据填入要求的result文件中。表格格式: 表格1 data1.xlsx 姓名张三李四表格2 data2.xlsx 序号姓名性别身份证号联系电话 01 张三男 130012345678901234 13911111111 02 李四男 123012345678901234 13922222222 03 王五男 123012345678901234 13933333333 表格3 result.xlsx 序号所属部门厂商姓名身份证号联系电话备注代码: from openpyxl import load_workbook from openpyxl import Workbook import pandas as pd wb_in = load_workbook( filename = '.\\test\\data1.xlsx') ws_in = wb_in.active ## 名字列表开始结束行号 start_index = 2 end_index = 4 wb_info = load_workbook( filename = '.\\test\\data2.xlsx') ws_info = wb_info.active wb_out = load_workbook( filename = '.\\test\\result.xlsx') ws_out = wb_out.active out_index = 2 ## 找到要统计人员姓名 for x in range(start_index,end_index): name = ws_in.cell(row=x,column=1) print(name.value) ## 查找要统计人员附件信息并更新到统计表格中 find_flag = 0 for row in ws_info.iter_rows("B"): for col in row: #print(col.value) if (str(col.value).strip() == str(name.value).strip()) and (find_flag == 0): ## 第四列身份证第五列联系电话 idno = ws_info.cell(row=col.row,column=4) phoneno = ws_info.cell(row=col.row, column=5) ## 更新到统计文件 ws_out['A'+ str(out_index)].value = str(out_index-1) ws_out['B'+ str(out_index)].value = 'XX部' ws_out['C'+ str(out_index)].value = 'XXX' ws_out['D'+ str(out_index)].value = name.value ws_out['E'+ str(out_index)].value = idno.value ws_out['F'+ str(out_index)].value = phoneno.value out_index = out_index + 1 find_flag = 1 break if find_flag == 0: print(name.value) wb_out.save( filename = '.\\test\\result.xlsx') 运行结果序号所属部门厂商姓名身份证号联系电话备注 1 XX部 XXX 张三 130012345678901234 13911111111 2 XX部 XXX 李四 130012345678901234 13922222222 参考 Openpyxl Doc Using openpyxl to find rows that contain cell with specific value (Python 3.6)

python 解压指定路径下zip文件

概述：项目变更版本需要源码包，检查关键代码是否缺失，其中有一个步骤需要进入指定目录解压所有.ZIP压缩包问题： 1. 并行处理； 2. zipfile extract后中文名称乱码；解决：并行处理使用，参考之前的扫描端口的套路修改； zipfile 解压乱码，踏着前人的足迹，查看了下python zipfile 源码open 函数中如果zinfo.flag_bits 不是utf-8 都默认设为cp437 def open(self, name, mode="r", pwd=None, *, force_zip64=False): ... if zinfo.flag_bits & 0x800: # UTF-8 filename fname_str = fname.decode("utf-8") else: fname_str = fname.decode("cp437") ... ZIP File Format Specification 中描述 APPENDIX D - Language Encoding (EFS) ------------------------------------ D.1 The ZIP format has historically supported only the original IBM PC character encoding set, commonly referred to as IBM Code Page 437. This limits storing file name characters to only those within the original MS-DOS range of values and does not properly support file names in other character encodings, or languages. To address this limitation, this specification will support the following change. 代码： import os import zipfile from queue import Queue import time import threading from pathlib import Path print_lock = threading.Lock() def unzip(file): file_name, ext = os.path.splitext(file) if ext == ".zip": try: f = zipfile.ZipFile(file, 'r') for fn in f.namelist(): extract_path = Path(f.extract(fn)) extract_path.replace(fn.encode('cp437').decode('gbk')) with print_lock: print(file, 'unzip ok') except: with print_lock: print(file, 'unzip error') pass def threader(): while True: worker = q.get() unzip(worker) q.task_done() def create_thread( threadnums ): for x in range(threadnums): t = threading.Thread(target=threader) t.daemon = True t.start() if __name__ == "__main__": q = Queue() startTime = time.time() path = os.getcwd()+'\\源码' os.chdir(path) file_list = os.listdir(path) print(file_list) create_thread(100) for unzipfile in file_list: q.put(unzipfile) q.join() print('Time taken:', time.time()-startTime) 参考： python zipfile extract 解压中文文件名 ...

Netflix Blog- Netflix Python技术的使用

这篇文章 Python at Netflix 是出自 Netflix Technology Blog 本文主要介绍python在Netflix各个组内的应用 Open Connect: Netflix搭建的全球CDN网络，python有广泛应用。 Demand Engineering：数值分析类：numpy， scipy AWS SDK：boto3 Web 框架： Flask 队列： RQ（Redis queue）轻量级IDE： bpython 编辑工具： Jupyter Notebooks， nteract Data Explorer： visualization tools Data FrameWork：Semiotic CORE: 统计学数值分析类：numpy， scipy ruptures pandas Monitoring, alerting and auto-remediation: Spectator : metris的客户端客户端： Spectator Python client Winston and Bolt : Web 框架： Flask WSGI http server： Gunicorn Flask Rest APIs 插件： Flask-RESTPlus Information Security：安全监控： Security Monkey SSH资源保护： Bless TLS证书生成：Lemur. AWS安全部署（权限最小化）： Aardvark and Repokid 数据取证： Diffy Prism， Lanius Personalization Algorithms：深度神经网络学习： TensorFlow Keras PyTorch Gradient Boosted Decision Trees ： XGBoost LightGBM 其他相关科学运算类：numpy， scipy pandas sklearn matplotlib cvxpy Machine Learning Infrastructure： ML Libraries: R Project TensorFlow XGBoost ...