\documentclass[14pt]{beamer} \usepackage[latin1]{inputenc} % settings for code snippets \usepackage{listings} \usepackage{fancyvrb} \lstset{language=Python, basicstyle=\footnotesize\ttfamily, frame=none, stringstyle=\color{blue}, fancyvrb=true, xleftmargin=2pt,xrightmargin=2pt, showstringspaces=false} \usetheme{Boadilla} %\usetheme{Warsaw} \setbeamercovered{transparent} \setbeamertemplate{navigation symbols}{} \title[Effective Python for Statically typed VM]{Towards a More Effective Implementation of Python on Top of Statically Typed Virtual Machines} \author[Antonio Cuni]{Antonio Cuni\\\small{Thesis advisor: Davide Ancona}} \institute[DISI]{DISI - Università degli studi di Genova} \date{December 13, 2007} % outline \AtBeginSection[] { \begin{frame} \frametitle{Outline} \small \tableofcontents[currentsection,hideothersubsections] \normalsize \end{frame} } \newcommand{\Any}{\texttt{Any}} \newcommand{\Case}{\texttt{Case}} \begin{document} \begin{frame} \titlepage \end{frame} \section{Introduction} \begin{frame} \frametitle{Overview of Python} \begin{itemize} \item Very high level language \item Dynamically typed \item Growing interest \item Current implementations are slow \begin{itemize} \item CPython \item IronPython \item Jython \item PyPy \end{itemize} \item We will focus on .NET and JVM \end{itemize} \end{frame} \section{The problem} \begin{frame} \frametitle{Why Python is slow?} \begin{itemize} \item Interpretation overhead \item Boxed arithmetic / overflow handling \item Extreme introspection and reflection \item Dynamic lookup of methods and attributes \item Dynamic dispatch of operations \end{itemize} \end{frame} \begin{frame} \frametitle{Interpretation overhead} \begin{itemize} \item Python source code $\rightarrow$ Python bytecode \item Bytecode $\rightarrow$ Virtual Machine \item Roughly 2.5x overhead \end{itemize} \end{frame} \begin{frame}[fragile] \frametitle{Boxed arithmetic / overflow handling} \begin{itemize} \item Everything is an object \item Numbers are objects: boxed representation \item Automatic switch to arbitrary precision numbers \item Simple benchmark: 40 times slower than C \end{itemize} \pause \begin{exampleblock}{Benchmark} \begin{lstlisting} i = 0 while i < 10000000: i = i+1 \end{lstlisting} \end{exampleblock} \end{frame} \begin{frame} \frametitle{Introspection and reflection} \begin{itemize} \item Inspect a running program \begin{itemize} \item Find out the methods and attributes of an instance \item Find out the names of the variables defined in the current scope \item ... \end{itemize} \item Modify a running program \begin{itemize} \item Change the class of a live object \item ... \end{itemize} \item Access to execution frames \begin{itemize} \item Callee can change the local variables of the caller \end{itemize} \end{itemize} \end{frame} \begin{frame}[fragile] \frametitle{Dynamic lookup of methods and attributes} \begin{exampleblock}{Example} \begin{lstlisting} # an empty class class MyClass: pass def foo(target, flag): if flag: target.x = 42 obj = MyClass() foo(obj, True) print obj.x print getattr(obj, "x") \end{lstlisting} \end{exampleblock} \end{frame} \begin{frame}[fragile] \frametitle{Dynamic dispatch of operations} \begin{itemize} \item Opcode for addition: BINARY\_ADD \item Dynamically overloaded (for numbers, strings, list, etc.) \item Dispatch at run-time \end{itemize} \pause \begin{exampleblock}{Benchmark} \begin{lstlisting} i = 0 while i < 10000000: i = i+1 \end{lstlisting} \end{exampleblock} \end{frame} \section{Towards a possible solution} \begin{frame}[fragile] \frametitle{From Python bytecode to machine code} \begin{columns} \begin{column}{0.45\textwidth} \begin{exampleblock}{Python source} \begin{lstlisting} def add(a, b): return a+b \end{lstlisting} \end{exampleblock} \end{column} \pause \begin{column}{0.45\textwidth} \begin{exampleblock}{Bytecode} \begin{lstlisting} LOAD_FAST 0 (a) LOAD_FAST 1 (b) BINARY_ADD RETURN_VALUE \end{lstlisting} \end{exampleblock} \end{column} \end{columns} \pause \begin{exampleblock}{Translated code} \begin{lstlisting} public static W_Root add(W_Root a, W_Root b) { return Operations.BINARY_ADD(a, b); } \end{lstlisting} \end{exampleblock} \end{frame} \begin{frame} \frametitle{Reusing PyPy} \begin{itemize} \item Python is not easy to implement \item PyPy is already very compliant to CPython \item Our goal: adapt PyPy to our needs \end{itemize} \end{frame} \begin{frame} \frametitle{PyPy architecture} \begin{center} \includegraphics[scale=0.35]{arch.pdf} \end{center} \end{frame} \begin{frame}[fragile] \frametitle{Fast paths} \begin{exampleblock}{Python source} \begin{lstlisting} def fn(a, b): return (a+b)*(a-b) \end{lstlisting} \end{exampleblock} \pause \begin{exampleblock}{Unoptimized translation} \begin{lstlisting} public static W_Root fn(W_Root a, W_Root b) { W_Root tmp1 = Operations.BINARY_ADD(a, b); W_Root tmp2 = Operations.BINARY_SUB(a, b); return Operations.BINARY_MUL(tmp1, tmp2); } \end{lstlisting} \end{exampleblock} \end{frame} \begin{frame}[fragile] \frametitle{Fast paths} \begin{exampleblock}{Optimized translation} \begin{lstlisting} public static W_Root fn(W_Root a, W_Root b) { if (a.GetType() == typeof(W_Integer) && b.GetType() == typeof(W_Integer)) { int aa = a.ToInt(); int bb = b.ToInt(); return new W_Integer(fn_fast(aa, bb)); } else { W_Root tmp1 = Operations.BINARY_ADD(a, b); W_Root tmp2 = Operations.BINARY_SUB(a, b); return Operations.BINARY_MUL(tmp1, tmp2); } } public static int fn_fast(int a, int b) { return (a+b)*(a-b); } \end{lstlisting} \end{exampleblock} \end{frame} \section{A Type System for Python} \begin{frame} \frametitle{General goals} \begin{itemize} \item Speed \item Type safety \textbf{is not} a goal \item The type system drives the code generator \item Emphasis on \emph{fast paths} \item Incomplete/extensible \end{itemize} \end{frame} \begin{frame} \frametitle{\Any\ and \Case} \begin{alertblock}{\Any} \begin{itemize} \item \Any\ is the most generic type \item By default, all expressions are annotated with \Any \item Operations on expressions of type \Any\ are slow \end{itemize} \end{alertblock} \pause \begin{alertblock}{\Case} \begin{itemize} \item The \Case\ operator combines two or more types \item The code generator emits different paths for each case \item e.g. \Case\texttt{(A, Any)} \end{itemize} \end{alertblock} \end{frame} \begin{frame} \frametitle{Other types} \begin{alertblock}{Numeric types} \begin{itemize} \item \texttt{int}, \texttt{long}, \texttt{float} \item Most integer operations: \Case\texttt{(int, long)} \end{itemize} \end{alertblock} \pause \begin{alertblock}{Built-in types} \begin{itemize} \item \texttt{list}, \texttt{tuple}, \texttt{dict} \item Parametric on the type of the items \item Immutable \end{itemize} \end{alertblock} \end{frame} \begin{frame}[fragile] \frametitle{User defined types} \begin{itemize} \item Structural subtyping \item Type can change at run-time \item Types are inferred by how objects are used \end{itemize} \pause \begin{exampleblock}{Infer type} \begin{lstlisting} def read_data(source): data = source.read() source.close() return data \end{lstlisting} \end{exampleblock} \end{frame} \begin{frame} \frametitle{Function types} \begin{itemize} \item Functions are first-order values \item Calling convention is complex (see example) \item Arrow type for simple functions: $A*B \rightarrow C$ \item \Any\ type for more complex functions \item Should still give a considerable speed-up \end{itemize} \end{frame} \section{Type assignment} \begin{frame} \frametitle{Possible approaches} \begin{itemize} \item Static type inference \item Optional annotations \item Type feedbacks \item Just In Time code generation \item Combination of those \end{itemize} \end{frame} \begin{frame} \frametitle{Static type inference} \begin{itemize} \item Already tried in Python (e.g. Starkiller) \item Cartesian Product Algorithm \item Optional annotations to help the engine \item CPA gives good annotations \item Previous works gave bad results on the code generation side \item PyPy will help here! \end{itemize} \end{frame} \begin{frame} \frametitle{Type feedbacks} \begin{itemize} \item Run the program \item Observe what happens \item Record the types of the expressions \item Cannot be precise, by definition \item Types will always be like \Case\texttt{(A, Any)} \item Need to execute the program first \end{itemize} \end{frame} \begin{frame} \frametitle{Just In Type code generation} \begin{itemize} \item Wait until we know the precise types, at run-time \item Generate very fast code, exploiting all the information needed \item Nobody ever tried that on JVM or .NET \item JIT-over-JIT \end{itemize} \end{frame} \section{Workplan} \begin{frame} \frametitle{Workplan} \begin{enumerate} \item CLI or JVM backend for PyPy JIT (maybe partial) \item Adapt the JIT into an AOT \item Implement a fast code generator \item Type assignment \begin{itemize} \item Manual annotations \item Type inference engine \item Type feedbacks engine \item Complete the JIT backend \end{itemize} \end{enumerate} \end{frame} \begin{frame} \frametitle{Thank you} \begin{itemize} \item Thank you for the attention \item Any questions? \end{itemize} \end{frame} \end{document}