小工具1.0
This commit is contained in:
parent
5e30cdea23
commit
c498837e4b
|
@ -0,0 +1,8 @@
|
||||||
|
# 默认忽略的文件
|
||||||
|
/shelf/
|
||||||
|
/workspace.xml
|
||||||
|
# 基于编辑器的 HTTP 客户端请求
|
||||||
|
/httpRequests/
|
||||||
|
# Datasource local storage ignored files
|
||||||
|
/dataSources/
|
||||||
|
/dataSources.local.xml
|
|
@ -0,0 +1,250 @@
|
||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"Requirement already satisfied: html2text in d:\\conda\\envs\\xl\\lib\\site-packages (2024.2.26)\n",
|
||||||
|
"Requirement already satisfied: lxml in d:\\conda\\envs\\xl\\lib\\site-packages (5.1.0)\n",
|
||||||
|
"Collecting pyperclip\n",
|
||||||
|
" Downloading pyperclip-1.8.2.tar.gz (20 kB)\n",
|
||||||
|
" Preparing metadata (setup.py): started\n",
|
||||||
|
" Preparing metadata (setup.py): finished with status 'done'\n",
|
||||||
|
"Building wheels for collected packages: pyperclip\n",
|
||||||
|
" Building wheel for pyperclip (setup.py): started\n",
|
||||||
|
" Building wheel for pyperclip (setup.py): finished with status 'done'\n",
|
||||||
|
" Created wheel for pyperclip: filename=pyperclip-1.8.2-py3-none-any.whl size=11136 sha256=6e4bca73fa5bfe452a9bf543697a8a05c04392e67b7d3e8fa75ec9b4abba6b75\n",
|
||||||
|
" Stored in directory: c:\\users\\25086\\appdata\\local\\pip\\cache\\wheels\\70\\bd\\ba\\8ae5c080c895c9360fe6e153acda2dee82527374467eae061b\n",
|
||||||
|
"Successfully built pyperclip\n",
|
||||||
|
"Installing collected packages: pyperclip\n",
|
||||||
|
"Successfully installed pyperclip-1.8.2\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"!pip install html2text lxml pyperclip\n",
|
||||||
|
"!pip install Pillow pyperclip"
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"collapsed": false,
|
||||||
|
"ExecuteTime": {
|
||||||
|
"end_time": "2024-03-23T05:07:12.519422Z",
|
||||||
|
"start_time": "2024-03-23T05:07:08.129646Z"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"id": "6d25e442e9ebdea2",
|
||||||
|
"execution_count": 50
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 40,
|
||||||
|
"id": "initial_id",
|
||||||
|
"metadata": {
|
||||||
|
"collapsed": true,
|
||||||
|
"ExecuteTime": {
|
||||||
|
"end_time": "2024-03-23T04:41:47.923423Z",
|
||||||
|
"start_time": "2024-03-23T04:41:47.920333Z"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"import requests\n",
|
||||||
|
"from html2text import HTML2Text\n",
|
||||||
|
"from lxml import etree\n",
|
||||||
|
"from html import unescape\n",
|
||||||
|
"import os"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"url = 'https://blog.csdn.net/ysblogs/article/details/88530124'"
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"collapsed": false,
|
||||||
|
"ExecuteTime": {
|
||||||
|
"end_time": "2024-03-23T04:41:48.700814Z",
|
||||||
|
"start_time": "2024-03-23T04:41:48.697800Z"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"id": "1a9f95e42361f50f",
|
||||||
|
"execution_count": 41
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"\n",
|
||||||
|
"headers = {\n",
|
||||||
|
" 'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36 Edg/122.0.0.0',\n",
|
||||||
|
" 'Cookie':'uuid_tt_dd=10_18798875550-1703252547299-411616; UserName=Shen_Mac; UserInfo=9286fcc0083a4ad1b4c60c58e55f1895; UserToken=9286fcc0083a4ad1b4c60c58e55f1895; UserNick=Shen_Mac; AU=295; UN=Shen_Mac; BT=1703497224926; p_uid=U010000; Hm_up_6bcd52f51e9b3dce32bec4a3997715ac=%7B%22islogin%22%3A%7B%22value%22%3A%221%22%2C%22scope%22%3A1%7D%2C%22isonline%22%3A%7B%22value%22%3A%221%22%2C%22scope%22%3A1%7D%2C%22isvip%22%3A%7B%22value%22%3A%220%22%2C%22scope%22%3A1%7D%2C%22uid_%22%3A%7B%22value%22%3A%22Shen_Mac%22%2C%22scope%22%3A1%7D%7D; c_adb=1; historyList-new=%5B%5D; Hm_ct_6bcd52f51e9b3dce32bec4a3997715ac=6525*1*10_18798875550-1703252547299-411616!5744*1*Shen_Mac; __gads=ID=13b084a0d17ef9c6:T=1711116802:RT=1711116802:S=ALNI_MbkIcB7VJ9bHN2usHc1aet0Bp4nsw; __gpi=UID=00000d566881fb24:T=1711116802:RT=1711116802:S=ALNI_MYqFMHAbLHe1BhJTcDnS7bonchWMw; __eoi=ID=346b3efd1cf0c8ad:T=1711116802:RT=1711116802:S=AA-Afjaui7q1ow-WAFxJ88dWJUAC; _ga=GA1.2.582472258.1711123815; _gid=GA1.2.114771557.1711123815; _ga_7W1N0GEY1P=GS1.1.1711123814.1.1.1711123884.60.0.0; c_segment=0; Hm_lvt_6bcd52f51e9b3dce32bec4a3997715ac=1711013381,1711116798,1711123812,1711164937; dc_sid=ddb746a22f3aef0ce41bb5e03a472d00; dc_session_id=10_1711166928729.783391; c_first_ref=www.bing.com; c_dl_fref=https://blog.csdn.net/naer_chongya/article/details/131665892; _clck=5u5e4g%7C2%7Cfkb%7C0%7C1523; c_dl_prid=1711167202984_265612; c_dl_rid=1711167374683_557146; c_dl_fpage=/download/weixin_42144086/19393382; c_dl_um=distribute.pc_relevant.none-task-blog-2%7Edefault%7Ebaidujs_baidulandingword%7Edefault-0-129448279-blog-131665892.235%5Ev43%5Epc_blog_bottom_relevance_base3; c_utm_medium=distribute.pc_relevant.none-task-download-2%7Edefault%7EBlogCommendFromBaidu%7EAntiPaid-6-19393382-blog-131665892.235%5Ev43%5Epc_blog_bottom_relevance_base3; c_utm_relevant_index=10; _clsk=o44fc0%7C1711167378071%7C2%7C0%7Cn.clarity.ms%2Fcollect; c_pref=https%3A//blog.csdn.net/naer_chongya/article/details/131665892; c_ref=https%3A//www.bing.com/; firstDie=1; creativeSetApiNew=%7B%22toolbarImg%22%3A%22https%3A//img-home.csdnimg.cn/images/20231011044944.png%22%2C%22publishSuccessImg%22%3A%22https%3A//img-home.csdnimg.cn/images/20231011045003.png%22%2C%22articleNum%22%3A0%2C%22type%22%3A0%2C%22oldUser%22%3Afalse%2C%22useSeven%22%3Atrue%2C%22oldFullVersion%22%3Afalse%2C%22userName%22%3A%22Shen_Mac%22%7D; c_first_page=https%3A//blog.csdn.net/ysblogs/article/details/88530124; Hm_lpvt_6bcd52f51e9b3dce32bec4a3997715ac=1711167957; SidecHatdocDescBoxNum=true; log_Id_click=670; waf_captcha_marker=fbbc908b3209860a12a2f854c0aded8eca8c16814748fcb2bd2f1879052f07ab; c_dsid=11_1711168032071.771919; c_page_id=default; dc_tos=sas9pc; log_Id_pv=741; log_Id_view=27511'\n",
|
||||||
|
"}\n"
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"collapsed": false,
|
||||||
|
"ExecuteTime": {
|
||||||
|
"end_time": "2024-03-23T04:41:48.958821Z",
|
||||||
|
"start_time": "2024-03-23T04:41:48.955804Z"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"id": "34ae80cfe01527bc",
|
||||||
|
"execution_count": 42
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"\n",
|
||||||
|
"r=requests.get(url, headers=headers)"
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"collapsed": false,
|
||||||
|
"ExecuteTime": {
|
||||||
|
"end_time": "2024-03-23T04:41:49.970131Z",
|
||||||
|
"start_time": "2024-03-23T04:41:49.450823Z"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"id": "1266447180328281",
|
||||||
|
"execution_count": 43
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"look for text...\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"html = r.content.decode(\"utf8\")\n",
|
||||||
|
"# print(html)\n",
|
||||||
|
"tree = etree.HTML(html)\n",
|
||||||
|
"print(\"look for text...\")\n",
|
||||||
|
"# 找到需要的html块\n",
|
||||||
|
"title = tree.xpath('//*[@id=\"articleContentId\"]/text()')[0]\n",
|
||||||
|
"block = tree.xpath('//*[@id=\"content_views\"]')\n",
|
||||||
|
"# html\n",
|
||||||
|
"ohtml = unescape(etree.tostring(block[0]).decode(\"utf8\"))\n",
|
||||||
|
"# 纯文本\n",
|
||||||
|
"text = block[0].xpath('string(.)').strip()"
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"collapsed": false,
|
||||||
|
"ExecuteTime": {
|
||||||
|
"end_time": "2024-03-23T04:41:55.502121Z",
|
||||||
|
"start_time": "2024-03-23T04:41:55.492083Z"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"id": "6577f8eea4b0bb70",
|
||||||
|
"execution_count": 45
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"write markdown...\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"with open(f\"{title}.md\", 'w', encoding='utf8') as md_file:\n",
|
||||||
|
" # 保存markdown\n",
|
||||||
|
" print(\"write markdown...\")\n",
|
||||||
|
" text_maker = HTML2Text()\n",
|
||||||
|
" # md转换\n",
|
||||||
|
" md_text = text_maker.handle(html)\n",
|
||||||
|
" md_file.write(md_text)"
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"collapsed": false,
|
||||||
|
"ExecuteTime": {
|
||||||
|
"end_time": "2024-03-23T04:43:05.647601Z",
|
||||||
|
"start_time": "2024-03-23T04:43:05.602362Z"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"id": "bafba43dece27d97",
|
||||||
|
"execution_count": 49
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"图像已转换为Base64并复制到剪贴板。\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"from PIL import ImageGrab\n",
|
||||||
|
"import pyperclip\n",
|
||||||
|
"import base64\n",
|
||||||
|
"\n",
|
||||||
|
"# 从剪贴板中获取图像\n",
|
||||||
|
"image = ImageGrab.grabclipboard()\n",
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
|
"if image is not None:\n",
|
||||||
|
" # 将图像转换为Base64编码\n",
|
||||||
|
" buffered = image.convert(\"RGB\").tobytes()\n",
|
||||||
|
" base64_image = base64.b64encode(buffered).decode(\"utf-8\")\n",
|
||||||
|
"\n",
|
||||||
|
" # 将Base64编码的图像放入剪贴板\n",
|
||||||
|
" pyperclip.copy(base64_image)\n",
|
||||||
|
" print(\"图像已转换为Base64并复制到剪贴板。\")\n",
|
||||||
|
"else:\n",
|
||||||
|
" print(\"剪贴板中没有图像。\")"
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"collapsed": false,
|
||||||
|
"ExecuteTime": {
|
||||||
|
"end_time": "2024-03-23T05:10:21.387867Z",
|
||||||
|
"start_time": "2024-03-23T05:10:21.356022Z"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"id": "df52d0199e805945",
|
||||||
|
"execution_count": 58
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"outputs": [],
|
||||||
|
"source": [],
|
||||||
|
"metadata": {
|
||||||
|
"collapsed": false
|
||||||
|
},
|
||||||
|
"id": "50799972739dbff4"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "Python 3",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python3"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": {
|
||||||
|
"name": "ipython",
|
||||||
|
"version": 2
|
||||||
|
},
|
||||||
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
|
"nbconvert_exporter": "python",
|
||||||
|
"pygments_lexer": "ipython2",
|
||||||
|
"version": "2.7.6"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 5
|
||||||
|
}
|
Reference in New Issue