{ "cells": [ { "cell_type": "code", "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Requirement already satisfied: html2text in d:\\conda\\envs\\xl\\lib\\site-packages (2024.2.26)\n", "Requirement already satisfied: lxml in d:\\conda\\envs\\xl\\lib\\site-packages (5.1.0)\n", "Collecting pyperclip\n", " Downloading pyperclip-1.8.2.tar.gz (20 kB)\n", " Preparing metadata (setup.py): started\n", " Preparing metadata (setup.py): finished with status 'done'\n", "Building wheels for collected packages: pyperclip\n", " Building wheel for pyperclip (setup.py): started\n", " Building wheel for pyperclip (setup.py): finished with status 'done'\n", " Created wheel for pyperclip: filename=pyperclip-1.8.2-py3-none-any.whl size=11136 sha256=6e4bca73fa5bfe452a9bf543697a8a05c04392e67b7d3e8fa75ec9b4abba6b75\n", " Stored in directory: c:\\users\\25086\\appdata\\local\\pip\\cache\\wheels\\70\\bd\\ba\\8ae5c080c895c9360fe6e153acda2dee82527374467eae061b\n", "Successfully built pyperclip\n", "Installing collected packages: pyperclip\n", "Successfully installed pyperclip-1.8.2\n" ] } ], "source": [ "!pip install html2text lxml pyperclip\n", "!pip install Pillow pyperclip" ], "metadata": { "collapsed": false, "ExecuteTime": { "end_time": "2024-03-23T05:07:12.519422Z", "start_time": "2024-03-23T05:07:08.129646Z" } }, "id": "6d25e442e9ebdea2", "execution_count": 50 }, { "cell_type": "code", "execution_count": 40, "id": "initial_id", "metadata": { "collapsed": true, "ExecuteTime": { "end_time": "2024-03-23T04:41:47.923423Z", "start_time": "2024-03-23T04:41:47.920333Z" } }, "outputs": [], "source": [ "import requests\n", "from html2text import HTML2Text\n", "from lxml import etree\n", "from html import unescape\n", "import os" ] }, { "cell_type": "code", "outputs": [], "source": [ "url = 'https://blog.csdn.net/ysblogs/article/details/88530124'" ], "metadata": { "collapsed": false, "ExecuteTime": { "end_time": "2024-03-23T04:41:48.700814Z", "start_time": "2024-03-23T04:41:48.697800Z" } }, "id": "1a9f95e42361f50f", "execution_count": 41 }, { "cell_type": "code", "outputs": [], "source": [ "\n", "headers = {\n", " 'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36 Edg/122.0.0.0',\n", " 'Cookie':'uuid_tt_dd=10_18798875550-1703252547299-411616; UserName=Shen_Mac; UserInfo=9286fcc0083a4ad1b4c60c58e55f1895; UserToken=9286fcc0083a4ad1b4c60c58e55f1895; UserNick=Shen_Mac; AU=295; UN=Shen_Mac; BT=1703497224926; p_uid=U010000; Hm_up_6bcd52f51e9b3dce32bec4a3997715ac=%7B%22islogin%22%3A%7B%22value%22%3A%221%22%2C%22scope%22%3A1%7D%2C%22isonline%22%3A%7B%22value%22%3A%221%22%2C%22scope%22%3A1%7D%2C%22isvip%22%3A%7B%22value%22%3A%220%22%2C%22scope%22%3A1%7D%2C%22uid_%22%3A%7B%22value%22%3A%22Shen_Mac%22%2C%22scope%22%3A1%7D%7D; c_adb=1; historyList-new=%5B%5D; Hm_ct_6bcd52f51e9b3dce32bec4a3997715ac=6525*1*10_18798875550-1703252547299-411616!5744*1*Shen_Mac; __gads=ID=13b084a0d17ef9c6:T=1711116802:RT=1711116802:S=ALNI_MbkIcB7VJ9bHN2usHc1aet0Bp4nsw; __gpi=UID=00000d566881fb24:T=1711116802:RT=1711116802:S=ALNI_MYqFMHAbLHe1BhJTcDnS7bonchWMw; __eoi=ID=346b3efd1cf0c8ad:T=1711116802:RT=1711116802:S=AA-Afjaui7q1ow-WAFxJ88dWJUAC; _ga=GA1.2.582472258.1711123815; _gid=GA1.2.114771557.1711123815; _ga_7W1N0GEY1P=GS1.1.1711123814.1.1.1711123884.60.0.0; c_segment=0; Hm_lvt_6bcd52f51e9b3dce32bec4a3997715ac=1711013381,1711116798,1711123812,1711164937; dc_sid=ddb746a22f3aef0ce41bb5e03a472d00; dc_session_id=10_1711166928729.783391; c_first_ref=www.bing.com; c_dl_fref=https://blog.csdn.net/naer_chongya/article/details/131665892; _clck=5u5e4g%7C2%7Cfkb%7C0%7C1523; c_dl_prid=1711167202984_265612; c_dl_rid=1711167374683_557146; c_dl_fpage=/download/weixin_42144086/19393382; c_dl_um=distribute.pc_relevant.none-task-blog-2%7Edefault%7Ebaidujs_baidulandingword%7Edefault-0-129448279-blog-131665892.235%5Ev43%5Epc_blog_bottom_relevance_base3; c_utm_medium=distribute.pc_relevant.none-task-download-2%7Edefault%7EBlogCommendFromBaidu%7EAntiPaid-6-19393382-blog-131665892.235%5Ev43%5Epc_blog_bottom_relevance_base3; c_utm_relevant_index=10; _clsk=o44fc0%7C1711167378071%7C2%7C0%7Cn.clarity.ms%2Fcollect; c_pref=https%3A//blog.csdn.net/naer_chongya/article/details/131665892; c_ref=https%3A//www.bing.com/; firstDie=1; creativeSetApiNew=%7B%22toolbarImg%22%3A%22https%3A//img-home.csdnimg.cn/images/20231011044944.png%22%2C%22publishSuccessImg%22%3A%22https%3A//img-home.csdnimg.cn/images/20231011045003.png%22%2C%22articleNum%22%3A0%2C%22type%22%3A0%2C%22oldUser%22%3Afalse%2C%22useSeven%22%3Atrue%2C%22oldFullVersion%22%3Afalse%2C%22userName%22%3A%22Shen_Mac%22%7D; c_first_page=https%3A//blog.csdn.net/ysblogs/article/details/88530124; Hm_lpvt_6bcd52f51e9b3dce32bec4a3997715ac=1711167957; SidecHatdocDescBoxNum=true; log_Id_click=670; waf_captcha_marker=fbbc908b3209860a12a2f854c0aded8eca8c16814748fcb2bd2f1879052f07ab; c_dsid=11_1711168032071.771919; c_page_id=default; dc_tos=sas9pc; log_Id_pv=741; log_Id_view=27511'\n", "}\n" ], "metadata": { "collapsed": false, "ExecuteTime": { "end_time": "2024-03-23T04:41:48.958821Z", "start_time": "2024-03-23T04:41:48.955804Z" } }, "id": "34ae80cfe01527bc", "execution_count": 42 }, { "cell_type": "code", "outputs": [], "source": [ "\n", "r=requests.get(url, headers=headers)" ], "metadata": { "collapsed": false, "ExecuteTime": { "end_time": "2024-03-23T04:41:49.970131Z", "start_time": "2024-03-23T04:41:49.450823Z" } }, "id": "1266447180328281", "execution_count": 43 }, { "cell_type": "code", "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "look for text...\n" ] } ], "source": [ "html = r.content.decode(\"utf8\")\n", "# print(html)\n", "tree = etree.HTML(html)\n", "print(\"look for text...\")\n", "# 找到需要的html块\n", "title = tree.xpath('//*[@id=\"articleContentId\"]/text()')[0]\n", "block = tree.xpath('//*[@id=\"content_views\"]')\n", "# html\n", "ohtml = unescape(etree.tostring(block[0]).decode(\"utf8\"))\n", "# 纯文本\n", "text = block[0].xpath('string(.)').strip()" ], "metadata": { "collapsed": false, "ExecuteTime": { "end_time": "2024-03-23T04:41:55.502121Z", "start_time": "2024-03-23T04:41:55.492083Z" } }, "id": "6577f8eea4b0bb70", "execution_count": 45 }, { "cell_type": "code", "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "write markdown...\n" ] } ], "source": [ "with open(f\"{title}.md\", 'w', encoding='utf8') as md_file:\n", " # 保存markdown\n", " print(\"write markdown...\")\n", " text_maker = HTML2Text()\n", " # md转换\n", " md_text = text_maker.handle(html)\n", " md_file.write(md_text)" ], "metadata": { "collapsed": false, "ExecuteTime": { "end_time": "2024-03-23T04:43:05.647601Z", "start_time": "2024-03-23T04:43:05.602362Z" } }, "id": "bafba43dece27d97", "execution_count": 49 }, { "cell_type": "code", "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "图像已转换为Base64并复制到剪贴板。\n" ] } ], "source": [ "from PIL import ImageGrab\n", "import pyperclip\n", "import base64\n", "\n", "# 从剪贴板中获取图像\n", "image = ImageGrab.grabclipboard()\n", "\n", "\n", "if image is not None:\n", " # 将图像转换为Base64编码\n", " buffered = image.convert(\"RGB\").tobytes()\n", " base64_image = base64.b64encode(buffered).decode(\"utf-8\")\n", "\n", " # 将Base64编码的图像放入剪贴板\n", " pyperclip.copy(base64_image)\n", " print(\"图像已转换为Base64并复制到剪贴板。\")\n", "else:\n", " print(\"剪贴板中没有图像。\")" ], "metadata": { "collapsed": false, "ExecuteTime": { "end_time": "2024-03-23T05:10:21.387867Z", "start_time": "2024-03-23T05:10:21.356022Z" } }, "id": "df52d0199e805945", "execution_count": 58 }, { "cell_type": "code", "outputs": [], "source": [], "metadata": { "collapsed": false }, "id": "50799972739dbff4" } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 2 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython2", "version": "2.7.6" } }, "nbformat": 4, "nbformat_minor": 5 }