| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742 |
- # coding: utf-8
- import base64
- import binascii
- import datetime
- import hashlib
- import ipaddress
- import logging
- import os
- import random
- import re
- import socket
- import string
- import subprocess
- import sys
- import urllib
- from collections import OrderedDict
- from urllib.parse import urlparse, urlunparse
- # 上面是标准库,下面是第三方库库
- import socks
- # python3.8 -m pip install PySocks
- from bs4 import BeautifulSoup
- def choose_from_list(items):
- """
- 返回一个选中的list。
- """
- while True:
- try:
- print("Available options:")
- for index, item in enumerate(items):
- print(f"{index} {item}")
- print(f"{len(items)} all")
- user_input = input("Choose your item(s) with index (comma-separated for multiple):\n")
- if user_input.lower() == 'all':
- return items
- selected_indices = [int(index.strip()) for index in user_input.split(',') if index.strip().isdigit()]
- if all(0 <= index < len(items) for index in selected_indices):
- return [items[index] for index in selected_indices]
- else:
- print("Invalid index number. Please try again.")
- except EOFError:
- sys.exit(0)
- except KeyboardInterrupt:
- sys.exit(0)
- except ValueError:
- print("Invalid input. Please enter valid index number(s).")
- def choose_from_iterable(iterable):
- """
- 返回一个选中的list。
- """
- ordered_iterable = iterable
- # 如果输入是字典,则转换为有序字典
- if isinstance(iterable, dict):
- ordered_iterable = OrderedDict(iterable)
- while True:
- try:
- print("Available options:")
- for index, item in enumerate(ordered_iterable):
- print(f"{index} {item}")
- print(f"{len(ordered_iterable)} all")
- user_input = input("Choose your item(s) with index (comma-separated for multiple):\n")
- if user_input.lower() == 'all':
- return list(ordered_iterable.values())
- selected_indices = [int(index.strip()) for index in user_input.split(',') if index.strip().isdigit()]
- if all(0 <= index < len(ordered_iterable) for index in selected_indices):
- selected_items = [list(ordered_iterable.values())[index] for index in selected_indices]
- return selected_items
- else:
- print("Invalid index number. Please try again.")
- except EOFError:
- sys.exit(0)
- except KeyboardInterrupt:
- sys.exit(0)
- except ValueError:
- print("Invalid input. Please enter valid index number(s).")
- def get_file_content(file_path):
- encodings = ["utf-8", "gbk"]
- for encoding in encodings:
- try:
- with open(file_path, "r", encoding=encoding) as fp:
- return fp.read()
- except:
- continue
- return None
- def get_full_path(path):
- if path.startswith("~"):
- path = os.path.expanduser(path)
- if path.startswith("./") or path.startswith(".\\"):
- path = os.path.abspath(path)
- return path
- def clean_list(lines, spliter=None, do_strip=True, remove_empty=True):
- """
- 如果有分割符,会对每行再进行分割
- 默认对每个元素进行strip
- 默认删除空字符串
- :param lines:
- :param spliter:
- :param do_strip:
- :param remove_empty:
- :return:
- """
- if isinstance(lines, list):
- if spliter:
- lines = [item for line in lines for item in line.split(spliter)]
- if do_strip:
- lines = [line.strip() for line in lines]
- if remove_empty and "" in lines:
- lines = [line for line in lines if line != ""]
- return lines
- return lines
- def get_lines_from_file(file_path, spliter=";", do_strip=True, remove_empty=True):
- """
- 从文件中读行,返回一个列表。
- 如果有分割符,会对每行再进行分割
- 默认对每个元素进行strip
- 默认删除空字符串
- :param file_path:
- :param spliter:
- :param do_strip:
- :param remove_empty:
- :return:
- """
- encodings_to_try = ['utf-8', 'gbk'] # 尝试的编码列表
- for encoding in encodings_to_try:
- try:
- with open(file_path, 'r', encoding=encoding) as f:
- lines = f.readlines()
- return clean_list(lines, spliter, do_strip, remove_empty)
- except UnicodeDecodeError:
- continue
- except FileNotFoundError:
- print(f"File not found: {file_path}")
- return None
- except Exception as e:
- print(f"An error occurred: {e}")
- return None
- return None # 如果都尝试失败,则返回 None
- def get_lines_from_quote(text, spliter=";", do_strip=True, remove_empty=True):
- if not text or not isinstance(text, str):
- return []
- lines = text.splitlines()
- return clean_list(lines, spliter, do_strip, remove_empty)
- def get_lines_from_console(spliter=";", do_strip=True, remove_empty=True):
- lines = []
- print("Enter multiple lines of text (Ctrl+D or Ctrl+Z to end):")
- while True:
- try:
- line = input()
- lines.append(line)
- except EOFError:
- break
- return clean_list(lines, spliter, do_strip, remove_empty)
- def is_valid_domain(domain):
- domain_pattern = "^((?!-)[A-Za-z0-9-]{1,63}(?<!-)\\.)+[A-Za-z]{2,6}$"
- return re.match(domain_pattern, domain) is not None
- def is_valid_host(host):
- if not host:
- return False
- try:
- return is_valid_domain(host) or is_valid_ip(host)
- except:
- return False
- def is_valid_ip(host):
- try:
- ipaddress.ip_address(host)
- return True
- except ValueError:
- return False
- def is_valid_subnet(subnet):
- '''
- strict =False,因为想要 192.168.1.1/27这个格式返回true
- :param subnet:
- :return:
- '''
- try:
- ipaddress.ip_network(subnet, strict=False)
- return True
- except ValueError:
- return False
- def is_valid_domain_by_query(host):
- try:
- socket.getaddrinfo(host, None)
- return True
- except socket.gaierror:
- return False
- def is_valid_port(port):
- try:
- p = int(port)
- if 0 <= p <= 65535:
- return True
- except:
- pass
- return False
- def get_ip_list_of_subnet(subnet):
- try:
- tmp = ipaddress.ip_network(subnet, strict=False)
- result = [item.__str__() for item in tmp.hosts()]
- return result
- except ValueError:
- return []
- def get_logger(log_file_name='logger.log'):
- """
- # https://stackoverflow.com/questions/7016056/python-logging-not-outputting-anything
- # 只是将handler的level改成debug是不够的,还需要设置logger本身的level。logger是上游,handler是下游
- :param log_file_name:
- :return:
- """
- formatter = logging.Formatter('%(asctime)s | %(levelname)s | %(message)s')
- # 使用basicConfig设置全局的日志级别
- logging.basicConfig(level=logging.DEBUG)
- # 创建logger
- logger = logging.getLogger('main')
- # 创建和设置StreamHandler和FileHandler
- handlers = [logging.StreamHandler(sys.stdout), logging.FileHandler(log_file_name, encoding="utf-8")]
- for handler in handlers:
- handler.setFormatter(formatter)
- logger.addHandler(handler)
- return logger
- def gen_random_str(to_void):
- # 定义包含所有可能字符的字符集合
- while True:
- # characters = string.ascii_letters + string.digits + string.punctuation
- characters = string.ascii_letters + string.digits
- # 使用random.choices()函数从字符集合中随机选择字符,并生成随机字符串
- random_string = ''.join(random.choices(characters, k=5))
- if to_void and random_string in to_void:
- continue
- else:
- return random_string
- def highlight_print(content, tips=""):
- if not tips:
- tips = ""
- print()
- print(("#" * 10 + "{}" + "#" * 10).format(tips))
- print(content)
- print("#" * (20 + len(str(tips))))
- print()
- def set_socks_proxy(proxy_host, proxy_port):
- """
- 设置全局的 SOCKS 代理,适用于所有套接字操作。
- socks.set_default_proxy() # 取消代理设置
- """
- try:
- import socks
- import socket
- proxy_port = int(proxy_port)
- print("set proxy: {}:{}".format(proxy_host, proxy_port))
- socks.set_default_proxy(socks.SOCKS5, proxy_host, proxy_port)
- socket.socket = socks.socksocket
- return True
- except:
- print("set socks proxy failed!!!")
- return False
- def is_using_socks_proxy():
- """
- 检测当前程序是否使用了 SOCKS 代理。
- :return: 如果使用了 SOCKS 代理,返回 True;否则返回 False。
- """
- import socket
- return socket.socket == socks.socksocket
- def get_base_url(url):
- '''
- return 末尾不包含/
- 引用方法:
- from 包名处(模块名称).文件名称 import 函数名称
- 包或者模块,是指含有__init__.py的文件夹
- '''
- parsed_url = urlparse(url)
- base_url = urlunparse((parsed_url.scheme, parsed_url.netloc, '', '', '', ''))
- return base_url
- def url_encode(url):
- return urllib.parse.quote(url)
- def url_decode(url):
- return urllib.parse.unquote(url)
- def get_argv(num_of_arg=1):
- result = []
- if len(sys.argv) > num_of_arg:
- # return sys.argv[1:-1] #这是错误的,末尾index所在元素是不会被包含的
- return sys.argv[1:num_of_arg + 222] # 只要大于长度都可以!
- else:
- index = 0
- while index < num_of_arg:
- arg = input("Enter argument {}: ".format(index + 1))
- result.append(arg)
- index += 1
- return result
- def get_textarea_contents(html, name=None):
- # Parse the HTML with BeautifulSoup
- soup = BeautifulSoup(html, 'html.parser')
- if name:
- # Find all <textarea> tags with the specified name attribute and extract their content
- textarea_contents = [textarea.text.strip() for textarea in soup.select(f'textarea[name="{name}"]')]
- else:
- # Find all <textarea> tags and extract their content
- textarea_contents = [textarea.text.strip() for textarea in soup.find_all('textarea')]
- return textarea_contents
- def get_content_by_class(html, class_name):
- """
- 根据
- :param html:
- :param class_name:
- :return:
- """
- # 使用 BeautifulSoup 解析 HTML
- soup = BeautifulSoup(html, 'html.parser')
- # 使用 CSS 选择器定位元素
- elements = soup.select(f'.{class_name}')
- # 提取元素的内容
- content = [element.get_text() for element in elements]
- return content
- def get_content_by_element(html, element_name):
- # 使用 BeautifulSoup 解析 HTML
- soup = BeautifulSoup(html, 'html.parser')
- # 使用 CSS 选择器定位元素
- elements = soup.select(element_name)
- # 提取元素的内容
- content = [element.get_text() for element in elements]
- return content
- def get_full_path_for_file(path):
- if path.startswith("~"):
- path = os.path.expanduser(path)
- if path.startswith("./") or path.startswith(".\\"):
- path = os.path.abspath(path)
- return path
- def is_file_path_by_pattern(path):
- # 定义常见文件路径的正则表达式模式
- path_patterns = [
- r'^[a-zA-Z]:\\[^:*?"<>|\r\n]*$', # Windows 绝对路径
- r'^[a-zA-Z]:/[^:*?"<>|\r\n]*$', # Windows 绝对路径(斜杠)
- r'^/[^:*?"<>|\r\n]*$', # Linux/MacOS 绝对路径
- r'^\.[a-zA-Z0-9_/-]*$', # 相对路径
- r'^\.\.[a-zA-Z0-9_/-]*$' # 相对路径(上级目录)
- ]
- # 使用正则表达式匹配路径
- for pattern in path_patterns:
- if re.match(pattern, path):
- return True
- # 如果没有匹配任何模式,则判定为无效路径
- return False
- def get_ip(host):
- try:
- return socket.gethostbyname(host)
- except:
- return None
- def startswith_regex(pattern, text):
- """
- 尝试从字符串的[开头]匹配模式,如果匹配成功则返回True,否则返回False
- """
- match = re.match(pattern, text)
- if match:
- return True
- else:
- return False
- def extract_between(text, start, end):
- """
- 提取2个字符串之间的内容,返回一个列表
- :param text:
- :param start:
- :param end:
- :return:
- """
- pattern = re.escape(start) + r"(.*?)" + re.escape(end)
- matches = re.findall(pattern, text)
- if matches:
- return matches
- else:
- return []
- def findfirst_regex(pattern, text):
- """
- 在整个字符串中搜索匹配,如果找到则返回一个匹配对象,否则返回None
- """
- if not (pattern and text):
- return None
- search_result = re.search(pattern, text)
- if search_result:
- return search_result.group(0)
- return None
- # 使用re.findall查找所有匹配
- def findall_regex(pattern, text):
- """
- <td><a href="(.*?)" class="model-link inside">
- 根据正则表达式提取所有匹配的内容,返回一个列表
- """
- if not (pattern and text):
- return []
- result_list = re.findall(pattern, text)
- return result_list
- def replaceall_regex(pattern, replaceto, text):
- """
- 将正则表达式匹配到的内容替换为replaceto的内容,返回替换后的完整文本
- """
- if not (pattern and replaceto and text):
- return text
- new_text = re.sub(pattern, replaceto, text)
- return new_text
- def get_base_url_for_file(url):
- """
- return 末尾不包含/
- 引用方法:
- from 包名处(模块名称).文件名称 import 函数名称
- 包或者模块,是指含有__init__.py的文件夹
- """
- parsed_url = urlparse(url)
- base_url = urlunparse((parsed_url.scheme, parsed_url.netloc, '', '', '', ''))
- return base_url
- def get_files_in_path(path):
- """
- 获取某个路径中所有文件的绝对路径
- """
- file_list = []
- # 如果是文件,直接返回文件的绝对路径
- if os.path.isfile(path):
- return [os.path.abspath(path)]
- # 如果是目录,遍历目录及其子目录,返回所有文件的绝对路径列表
- if os.path.isdir(path):
- for root, dirs, files in os.walk(path):
- for file in files:
- file_path = os.path.join(root, file)
- file_list.append(os.path.abspath(file_path))
- return file_list
- def contains_any(string, keywords):
- """
- 判断string是否包含任何一个关键词
- """
- if isinstance(keywords, str):
- return keywords in string
- if isinstance(keywords, (list, set)):
- for keyword in keywords:
- if keyword in string:
- return True
- return False
- def print_all_str_vars(keywords_to_exclude=None):
- """
- 打印用户定义的所有字符串变量,可以设置关键词根据变量名进行排除
- """
- if keywords_to_exclude is None:
- keywords_to_exclude = {}
- all_variables = globals()
- for var_name, var_value in all_variables.items():
- if not isinstance(var_value, str):
- continue
- if var_name.startswith("__") and var_name.endswith("__"):
- continue
- if contains_any(var_name, keywords_to_exclude):
- continue
- else:
- print(var_value)
- def split_line(line):
- """
- 将一行字符串分割成多个部分,连续的tab和空格都当作一个分隔符
- testcase = "This\tis \t a\t\t test string\t"
- testcase = "aaa bbb"
- :param line:
- :return:
- """
- # parts = re.split(r'\s+|\t+', line)
- parts = line.split()
- # 字符串的split函数本身就有这样的能力
- return parts
- def md5(input_string):
- # 创建一个 MD5 哈希对象
- md5_hash = hashlib.md5()
- # 更新哈希对象以包含输入字符串的字节表示
- md5_hash.update(input_string.encode('utf-8'))
- # 获取 MD5 哈希值的十六进制表示
- md5_hex = md5_hash.hexdigest()
- return md5_hex
- def get_time_str():
- """
- 返回当前时间的字符串,常用于文件名
- :return:
- """
- current_time = datetime.datetime.now()
- formatted_time = current_time.strftime("%Y_%m_%d_%H_%M_%S_%f")[:-3]
- return formatted_time
- def run_external_program(command):
- """
- 注意,要执行的命令、脚本,基本都要求绝对路径
- :param command:
- :return:
- """
- try:
- # Run the external program and capture its output
- result = subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, shell=True)
- # Check if the command was successful (return code 0)
- if result.returncode == 0:
- # Return the standard output
- return result.stdout.strip()
- else:
- # If the command failed, print the error message
- print(f"Error: {result.stderr.strip()}")
- return None
- except Exception as e:
- print(f"An error occurred: {e}")
- return None
- def deduplicate_list(input_list):
- # 使用OrderedDict.fromkeys()去重,并保持原始顺序
- deduplicated_dict = OrderedDict.fromkeys(input_list)
- # 将字典的键转换回列表
- deduplicated_list = list(deduplicated_dict.keys())
- return deduplicated_list
- def is_hex_code(input_str):
- """
- hello -- 68656C6C6F
- :param input_str:
- :return:
- """
- return re.match(r"^[0-9a-fA-F]+$", input_str) is not None
- def hex_code_to_byte_array(hex_code):
- r"""
- 68656C6C6F --- b'hello'
- 1112136162 --- b'\x11\x12\x13ab'
- 可以打印的字符直接以字符表示,不行的用\x加code
- :param hex_code:
- :return:
- """
- return bytes.fromhex(hex_code)
- def byte_array_to_hex_code(byte_array):
- """
- 使用方法
- byte_array_to_hex_code(b"hello") -- bytes can only contain ASCII literal characters.
- byte_array_to_hex_code("hello中文".encode())
- :param byte_array:
- :return:
- """
- # 使用 binascii.hexlify 将字节数组转换为 hex code
- hex_code = binascii.hexlify(byte_array).decode()
- return hex_code
- def is_base64(input_str):
- try:
- base64.b64decode(input_str)
- return True
- except ValueError:
- return False
- def base64_encode(data):
- """
- 传入的参数可以是 str 或者 byte array格式
- encode --- str to byte array
- decode --- byte array to str
- :param data:
- :return:
- """
- if isinstance(data, str):
- data = data.encode()
- # 进行 Base64 编码
- encoded_data = base64.b64encode(data).decode()
- return encoded_data
- def base64_decode(data):
- """
- 解码后,如果转化为字符串就返回字符串,否则就返回byte[]
- :param data:
- :return:
- """
- # 尝试解码成字符串
- try:
- decoded_str = base64.b64decode(data).decode()
- return decoded_str
- except UnicodeDecodeError:
- # 解码成字符串失败,返回字节数组
- decoded_bytes = base64.b64decode(data)
- return decoded_bytes
- except Exception:
- return None
- def get_files_in_dir(directory, extensions=None, include_subdir=True):
- """
- 获取目录下的所有文件。
- 参数:
- - directory: 目标目录的路径。
- - extensions: 文件后缀过滤列表,例如 ['.txt', '.pdf'],默认为 None。
- - include_subdir: 是否遍历子目录,True 为遍历,False 为不遍历,默认为 True。
- 返回:
- 包含所有文件路径的列表。
- """
- files = []
- extensions = tuple(extensions) if extensions else None
- def is_valid_file(filename):
- return extensions is None or filename.endswith(extensions)
- if include_subdir:
- # 遍历目录及其子目录
- for root, _, filenames in os.walk(directory):
- for filename in filenames:
- if is_valid_file(filename):
- files.append(os.path.join(root, filename))
- else:
- # 不遍历子目录,直接获取目录下的文件列表
- files = [os.path.join(directory, filename) for filename in os.listdir(directory)
- if os.path.isfile(os.path.join(directory, filename)) and is_valid_file(filename)]
- return files
|