title : "Time and Memory Profile in python"
author : "Kent Chiu"
date : 2023-05-01
categories: ["Software"]
tags : ["Python"]

The code example for tracking

  1. the global time profile of all functions

  2. the function time/memory profile and memory usage

import sys 
from memory_profiler import *
from functools import partial
import pandas as pd 
import numpy as np 
import inspect
from io import StringIO 


from pyinstrument import Profiler

import cProfile 
from line_profiler import LineProfiler
from collections.abc import Mapping, Container
import logging
from functools import wraps


def dfs_getsizeof(obj, ids=set()):
    """get the size of an object
    The deep first search is applied to solve the issue that 
    sys.getsizeof only does a shallow size. 

    Args:
       obj: a python object
       ides: the set of memory address of objects

    """
    if id(obj) in ids:
        return 0
    res = sys.getsizeof(obj)
    ids.add(id(obj))
    if isinstance(obj, str) or isinstance(0, str):
        return res
    if isinstance(obj, Mapping):
        return res + sum(
            dfs_getsizeof(k, ids) + 
            dfs_getsizeof(v, ids) for k, v in obj.iteritems())
    if isinstance(obj, Container):
        return res + sum(dfs_getsizeof(x, ids) for x in obj)
    return res 

def dec_memory_usage(measure_peak_mem=False, logger=None):
    ep, round_n, log_msg = 10**6, 2, {}
    def _dec(func):
        def _inner( *args, **kwargs):
            log_msg['Function Name']  = func.__name__
            if measure_peak_mem:
                mem_usage, output= memory_usage(
                    partial(func, *args, **kwargs), max_usage=True, retval=True)
                log_msg['Peak Memory Usage (MB)'] = round(mem_usage, round_n)
            else:
                output = func(*args, **kwargs)

            log_msg['Output Memory Usage (MB)'] = round(dfs_getsizeof(output)/ep, round_n)

            for k, v in log_msg.items():
                msg = f'* {k} : {v}'
                if logger is not None:
                    logger.warning(msg)
                else:
                    print(msg)
        return _inner
    return _dec

def dec_memo_profile(func=None, logger=None, precision=1, backend='psutil'):
    """
    Decorator that will run the function and print a line-by-line profile
    """
    backend = choose_backend(backend)
    if logger is not None:
        if not hasattr(logger, 'write'):
            if hasattr(logger, 'warning'):
                logger.write = logger.warning
            else:
                raise ValueError('your logger must have write function')

    if backend == 'tracemalloc' and choose_backend:
        if not tracemalloc.is_tracing():
            tracemalloc.start()
    if func is not None:
        get_prof = partial(LineProfiler, backend=backend)
        show_results_bound = partial(
            show_results, stream=logger, precision=precision
        )
        if iscoroutinefunction(func):
            @wraps(wrapped=func)
            @coroutine
            def wrapper(*args, **kwargs):
                prof = get_prof()
                val = yield from prof(func)(*args, **kwargs)
                show_results_bound(prof)
                return val
        else:
            @wraps(wrapped=func)
            def wrapper(*args, **kwargs):
                prof = get_prof()
                val = prof(func)(*args, **kwargs)
                show_results_bound(prof)
                return val

        return wrapper
    else:
        def inner_wrapper(f):
            return profile(f, stream=logger, precision=precision,
                           backend=backend)

        return inner_wrapper

def dec_retrun_pyinstrument(logger=None):
    """global run time profiler
    """
    def _dec(func):
        p = Profiler(async_mode='disabled')

        @wraps(func)
        def _inner(*args, **kwargs):
            with p:
                result = func(*args, **kwargs)

            logger.warning(p.output_text(
                unicode=True,
                color=True,
                show_all=False,
                timeline=False,
            ))
            return result
        return _inner
    return _dec

def dec_return_lineprofiler(logger=None):
    def _dec(func):
        lp = LineProfiler()
        @wraps(func)
        def _inner(*args, **kwargs):
            lp.add_function(func) # add additional function to profile
            lp.enable()
            res = func(*args, **kwargs)

            txt_io = StringIO()
            lp.print_stats(stream=txt_io)
            logger.warning(txt_io.getvalue())

            return res
        return _inner
    return _dec


@dec_memory_usage(logger=logging)
def example_1(x):
    res = []
    for i in range(x):
        res.append([range(i)])
    return res 

@dec_memo_profile(logger=logging)
def example_2(x):
    res = []
    for i in range(x):
        res.append([range(i)])

    df = pd.DataFrame(res)
    return df 

#@dec_memory_usage(logger=logging)
@dec_return_lineprofiler(logger=logging)
def example_3(x):
    res = ''
    for i in range(x):
        res += f'{i},'*i
    return res

@dec_retrun_pyinstrument(logger=logging)
def main():
    example_1(x=1000)
    example_2(1000)
    example_3(x=1000)

if __name__ == '__main__':

    main()