Skip to content

🐍 Python脚本集合

💡 Python脚本让复杂的数据处理和自动化任务变得简单优�?


📊 数据处理脚本

📈 Excel数据分析

功能: 批量处理Excel文件,进行数据清洗、统计分析和报表生成

python
import pandas as pd
import os
from datetime import datetime

def analyze_excel_data(file_path):
    """分析Excel数据并生成报�?""
    df = pd.read_excel(file_path)
    
    # 数据清洗
    df = df.dropna()
    df = df.drop_duplicates()
    
    # 统计分析
    summary = {
        'total_rows': len(df),
        'columns': list(df.columns),
        'numeric_summary': df.describe()
    }
    
    # 生成报告
    report_name = f"report_{datetime.now().strftime('%Y%m%d')}.xlsx"
    df.to_excel(report_name, index=False)
    
    return summary

特点: 📅 2024-03-20 | �?数据分析 | 📊 报表生成


🗂�?文件批量重命�?

功能: 支持正则表达式的批量文件重命名工具,支持多种命名规则

python
import os
import re
from pathlib import Path

def batch_rename(directory, pattern, replacement, preview=True):
    """批量重命名文�?""
    path = Path(directory)
    files = list(path.glob('*'))
    changes = []
    
    for file in files:
        if file.is_file():
            old_name = file.name
            new_name = re.sub(pattern, replacement, old_name)
            
            if old_name != new_name:
                changes.append((old_name, new_name))
                
                if not preview:
                    new_path = file.parent / new_name
                    file.rename(new_path)
    
    if preview:
        print("预览重命名操�?")
        for old, new in changes:
            print(f"{old} -> {new}")
    else:
        print(f"成功重命�?{len(changes)} 个文�?)
    
    return changes

特点: 📅 2024-03-15 | �?批量操作 | 🔄 正则支持


🌐 网络爬虫脚本

🕷�?网页数据采集

功能: 多线程网页爬虫,支持动态页面和反反爬机�?

python
import requests
from bs4 import BeautifulSoup
import json
import time
from concurrent.futures import ThreadPoolExecutor

class WebScraper:
    def __init__(self, max_workers=5):
        self.session = requests.Session()
        self.session.headers.update({
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
        })
        self.max_workers = max_workers
    
    def scrape_page(self, url):
        """抓取单个页面"""
        try:
            response = self.session.get(url, timeout=10)
            response.raise_for_status()
            
            soup = BeautifulSoup(response.text, 'html.parser')
            
            # 提取数据
            data = {
                'title': soup.find('title').text if soup.find('title') else '',
                'links': [a.get('href') for a in soup.find_all('a', href=True)],
                'text': soup.get_text(strip=True)
            }
            
            return data
        except Exception as e:
            print(f"错误: {url} - {e}")
            return None
    
    def scrape_multiple(self, urls):
        """批量抓取"""
        with ThreadPoolExecutor(max_workers=self.max_workers) as executor:
            results = list(executor.map(self.scrape_page, urls))
        
        return [r for r in results if r is not None]

特点: 📅 2024-03-18 | �?多线�?| 🔍 数据采集


📧 自动化办公脚�?

✉️ 邮件批量发�?

功能: 支持HTML邮件、附件发送的批量邮件工具

python
import smtplib
from email.mime.multipart import MIMEMultipart
from email.mime.text import MIMEText
from email.mime.base import MIMEBase
from email import encoders
import os

class EmailSender:
    def __init__(self, smtp_server, port, username, password):
        self.smtp_server = smtp_server
        self.port = port
        self.username = username
        self.password = password
    
    def send_email(self, to_list, subject, body, attachments=None, is_html=True):
        """发送邮�?""
        msg = MIMEMultipart()
        msg['From'] = self.username
        msg['To'] = ', '.join(to_list)
        msg['Subject'] = subject
        
        # 添加邮件正文
        msg.attach(MIMEText(body, 'html' if is_html else 'plain', 'utf-8'))
        
        # 添加附件
        if attachments:
            for file_path in attachments:
                if os.path.isfile(file_path):
                    with open(file_path, 'rb') as attachment:
                        part = MIMEBase('application', 'octet-stream')
                        part.set_payload(attachment.read())
                    
                    encoders.encode_base64(part)
                    part.add_header(
                        'Content-Disposition',
                        f'attachment; filename= {os.path.basename(file_path)}'
                    )
                    msg.attach(part)
        
        # 发送邮�?
        try:
            server = smtplib.SMTP(self.smtp_server, self.port)
            server.starttls()
            server.login(self.username, self.password)
            text = msg.as_string()
            server.sendmail(self.username, to_list, text)
            server.quit()
            return True
        except Exception as e:
            print(f"发送失�? {e}")
            return False

特点: 📅 2024-03-12 | �?办公自动�?| 📎 附件支持


📝 PDF文档处理

功能: PDF合并、分割、水印添加等批量处理功能

python
from PyPDF2 import PdfReader, PdfWriter
from reportlab.pdfgen import canvas
from reportlab.lib.pagesizes import letter
import os

class PDFProcessor:
    @staticmethod
    def merge_pdfs(pdf_list, output_path):
        """合并PDF文件"""
        writer = PdfWriter()
        
        for pdf_path in pdf_list:
            reader = PdfReader(pdf_path)
            for page in reader.pages:
                writer.add_page(page)
        
        with open(output_path, 'wb') as output_file:
            writer.write(output_file)
        
        print(f"合并完成: {output_path}")
    
    @staticmethod
    def split_pdf(pdf_path, output_dir):
        """分割PDF文件"""
        reader = PdfReader(pdf_path)
        
        if not os.path.exists(output_dir):
            os.makedirs(output_dir)
        
        for i, page in enumerate(reader.pages):
            writer = PdfWriter()
            writer.add_page(page)
            
            output_path = os.path.join(output_dir, f"page_{i+1}.pdf")
            with open(output_path, 'wb') as output_file:
                writer.write(output_file)
        
        print(f"分割完成,共 {len(reader.pages)} �?)
    
    @staticmethod
    def add_watermark(pdf_path, watermark_text, output_path):
        """添加水印"""
        reader = PdfReader(pdf_path)
        writer = PdfWriter()
        
        # 创建水印
        watermark_pdf = "watermark.pdf"
        c = canvas.Canvas(watermark_pdf, pagesize=letter)
        c.setFillAlpha(0.3)
        c.drawString(100, 100, watermark_text)
        c.save()
        
        watermark_reader = PdfReader(watermark_pdf)
        watermark_page = watermark_reader.pages[0]
        
        for page in reader.pages:
            page.merge_page(watermark_page)
            writer.add_page(page)
        
        with open(output_path, 'wb') as output_file:
            writer.write(output_file)
        
        os.remove(watermark_pdf)
        print(f"水印添加完成: {output_path}")

特点: 📅 2024-03-10 | �?文档处理 | 🔧 多功�?


🖼�?图像处理脚本

🎨 图片批量处理

功能: 批量图片压缩、格式转换、尺寸调�?

python
from PIL import Image
import os
from pathlib import Path

class ImageProcessor:
    @staticmethod
    def resize_images(input_dir, output_dir, size=(800, 600), quality=85):
        """批量调整图片尺寸"""
        input_path = Path(input_dir)
        output_path = Path(output_dir)
        output_path.mkdir(exist_ok=True)
        
        supported_formats = ('.jpg', '.jpeg', '.png', '.bmp', '.gif')
        
        for image_file in input_path.iterdir():
            if image_file.suffix.lower() in supported_formats:
                try:
                    with Image.open(image_file) as img:
                        # 保持宽高�?
                        img.thumbnail(size, Image.Resampling.LANCZOS)
                        
                        # 保存
                        output_file = output_path / image_file.name
                        img.save(output_file, optimize=True, quality=quality)
                        
                        print(f"处理完成: {image_file.name}")
                
                except Exception as e:
                    print(f"处理失败 {image_file.name}: {e}")
    
    @staticmethod
    def convert_format(input_dir, output_dir, target_format='JPEG'):
        """批量转换图片格式"""
        input_path = Path(input_dir)
        output_path = Path(output_dir)
        output_path.mkdir(exist_ok=True)
        
        for image_file in input_path.iterdir():
            if image_file.suffix.lower() in ('.png', '.bmp', '.gif', '.tiff'):
                try:
                    with Image.open(image_file) as img:
                        # 转换为RGB(JPEG不支持透明度)
                        if target_format == 'JPEG' and img.mode in ('RGBA', 'LA'):
                            rgb_img = Image.new('RGB', img.size, (255, 255, 255))
                            rgb_img.paste(img, mask=img.split()[-1] if img.mode == 'RGBA' else None)
                            img = rgb_img
                        
                        # 保存
                        output_file = output_path / f"{image_file.stem}.jpg"
                        img.save(output_file, target_format)
                        
                        print(f"转换完成: {image_file.name} -> {output_file.name}")
                
                except Exception as e:
                    print(f"转换失败 {image_file.name}: {e}")

特点: 📅 2024-03-08 | �?图像处理 | 🔄 格式转换


📈 Python脚本统计

指标数�?
脚本总数25+
分类数量6�?
更新频率双周更新
应用领域数据处理、自动化

💡 Python编程最佳实�?

🐍 代码规范

  • 遵循PEP 8编码规范
  • 使用类型提示提高代码可读�?
  • 编写详细的文档字符串
  • 合理使用异常处理

�?性能优化

  • 使用列表推导式代替循�?
  • 选择合适的数据结构
  • 利用内置函数和库
  • 考虑使用多线�?多进�?

🔧 开发工�?

  • 使用虚拟环境管理依赖
  • 配置代码格式化工�?
  • 编写单元测试
  • 使用调试器进行问题排�?

🎯 Python的魅力在于用简洁优雅的代码解决复杂的问�