什么?你不知道怎么读取文件结尾的N个字符?

前言

话说有这么个事,有个日志文件很大,需要在前端展示后N个字符,类似于linux的tail命令。一开始文件是用aiofiles读的,读取后10000个字符,每次都是用read()读取全部,然后用切片截取后10000个字符,,后来有时候发现会报错MemoryError ,后来查了一下发现aiofiles并不是真正的异步,只是开了另一个线程去阻塞另一个线程读文件,因为epoll不支持本地io,操作系统读取本地文件只有阻塞的一种。所以最后就放弃了aiofiles的方案。使用seek先定位到文件末尾,再seek一下定位到当前位置减去N,然后读取即可。

代码

    def read_last_n_of_file(self, file_name, max_len):
        with open(file_name, "r", encoding="utf-8") as f:
            f.seek(0, 2)
            # seek to end of file; f.seek(0, os.SEEK_END) is legal
            if max_len == 0:
                max_len = f.tell()
            position = f.tell() - max_len
            if position < 0:
                position = 0
            f.seek(position, 0)
            # seek to the max_len last char of file;
            # while f.seek(f.tell()-max_len, os.SEEK_SET) is legal,
            # f.seek(-maxlen, 0) will throw an error.
            new_message = f.read()
            return new_message

附上seek的用法解析

file.seek(offset,position)
  1. offset: How many characters you need (i.e. 1 means one character)

  2. position
    

    : Tell where your file read/write operation should start .

    • 0 means starting of file
    • 1 means current position of file read/write cursor
    • 2 means end of file

f.seek(f.tell()-1,2) means go to the end of file and traverse back one element

print f.read() prints the value obtained from the seek command

附赠一下后端tornado和前端websocket的代码

from tornado.iostream import StreamClosedError
from tornado.web import RequestHandler
from tornado.websocket import WebSocketHandler, WebSocketClosedError

class WSViewFileHandler(WebSocketHandler):
    def check_origin(self, origin):
        return True

    def open(self, *args, **kwargs):
        self.write_message("OK")

    async def on_message(self, message):
        logger.info(f"message={message}")
        j = json.loads(message, encoding="utf-8")
        file_name = j.get("file_name")
        max_len = int(j.get("max_len", VIEW_LOG_DEFAULT_MAX_LEN))
        if file_name.endswith(".log"):
            date = j.get("file_name", "").split("/")[-2]
            log_file = quote_plus(j.get("file_name", "").split("/")[-1])
            file_name = os.path.join(project_dir, "logs", date, log_file)
        else:
            await self.write_message("<h1>目前只支持查看日志文件,不支持查看其它文件")
            self.close()
        close = j.get("close")
        # 缓存上次发送的消息, 如果新的和上次的一样就不发送了.
        last_message = ""
        logger.info(f"file_name={file_name}")
        SLEEP_SECONDS = 0.1
        try:
            new_message = self.read_last_n_of_file(file_name, max_len)
            if new_message != last_message:
                await self.write_message(new_message)
                last_message = new_message
            await asyncio.sleep(SLEEP_SECONDS)
            if not close:
                while True:
                    new_message = self.read_last_n_of_file(file_name, max_len)
                    if new_message != last_message:
                        await self.write_message(new_message[-max_len:])
                        last_message = new_message
                    await asyncio.sleep(SLEEP_SECONDS)
            else:
                self.close()
        except (WebSocketClosedError, StreamClosedError):
            pass
        except FileNotFoundError:
            await self.write_message(f"<h1>文件{file_name}不存在</h1>")
            self.close()

    def read_last_n_of_file(self, file_name, max_len):
        with open(file_name, "r", encoding="utf-8") as f:
            f.seek(0, 2)
            # seek to end of file; f.seek(0, os.SEEK_END) is legal
            if max_len == 0:
                max_len = f.tell()
            position = f.tell() - max_len
            if position < 0:
                position = 0
            f.seek(position, 0)
            # seek to the max_len last char of file;
            # while f.seek(f.tell()-max_len, os.SEEK_SET) is legal,
            # f.seek(-maxlen, 0) will throw an error.
            new_message = f.read()
            return new_message

    def on_close(self):
        logger.info("关闭")

<div style="padding: 15px;">
    <h2 style="color: deeppink;">文件实时预览:</h2><br>
    <h2 style="color: #0bc35b;">节点: </h2><br>
    <pre id="file_content"
         style="background:#333;color:#aaa;padding:20px;"></pre>
</div>

<script>
    var ws;
    if (ws) {
        ws.close();
    }
    ws = new WebSocket("ws://:8005/ws/view_file");
    document.getElementById("file_content").innerHTML = "正在建立连接, 请稍等...";
    ws.onmessage = function (e) {
        if (e.data === 'OK') {
            document.getElementById("file_content").innerHTML = "连接建立成功, 正在等待返回数据...";
            ws.send(JSON.stringify({
                "file_name": "",
                "max_len": "",
                "close": false
            }));
        } else {
            let string = e.data;
            // string = string.replace(/\r\n/g, "<br>");
            // string = string.replace(/\n/g, "<br>");
            document.getElementById("file_content").innerHTML = string;
            window.scrollTo(0, document.body.scrollHeight);
            var l = string.length;
            document.title = '日志浏览(' + l + '字)';
            // window.scrollTo(0,document.querySelector("#file_content").scrollHeight);
            // var element = document.getElementById("file_content");
            // element.scrollTop = element.scrollHeight;


        }
    };
    window.onbeforeunload = function (e) {
        ws.close();
    };

</script>