Plan 9 from Bell Labs’s /usr/web/sources/contrib/anothy/src/ctags/python.c

Copyright © 2021 Plan 9 Foundation.
Distributed under the MIT License.
Download the Plan 9 distribution.


/*
*   $Id: python.c 567 2007-06-24 01:20:20Z elliotth $
*
*   Copyright (c) 2000-2003, Darren Hiebert
*
*   This source code is released for free distribution under the terms of the
*   GNU General Public License.
*
*   This module contains functions for generating tags for Python language
*   files.
*/
/*
*   INCLUDE FILES
*/
#include "general.h"  /* must always come first */

#include <string.h>

#include "entry.h"
#include "options.h"
#include "read.h"
#include "routines.h"
#include "vstring.h"

/*
*   DATA DEFINITIONS
*/
typedef enum {
	K_CLASS, K_FUNCTION, K_MEMBER
} pythonKind;

static kindOption PythonKinds[] = {
	{TRUE, 'c', "class",    "classes"},
	{TRUE, 'f', "function", "functions"},
	{TRUE, 'm', "member",   "class members"}
};

typedef struct NestingLevel NestingLevel;
typedef struct NestingLevels NestingLevels;

struct NestingLevel
{
	int indentation;
	vString *name;
	boolean is_class;
};

struct NestingLevels
{
	NestingLevel *levels;
	int n;
	int allocated;
};

/*
*   FUNCTION DEFINITIONS
*/

#define vStringLast(vs) ((vs)->buffer[(vs)->length - 1])

static boolean isIdentifierFirstCharacter (int c)
{
	return (boolean) (isalpha (c) || c == '_');
}

static boolean isIdentifierCharacter (int c)
{
	return (boolean) (isalnum (c) || c == '_');
}

/* Given a string with the contents of a line directly after the "def" keyword,
 * extract all relevant information and create a tag.
 */
static void makeFunctionTag (vString *const function,
	vString *const parent, int is_class_parent)
{
	tagEntryInfo tag;
	initTagEntry (&tag, vStringValue (function));
	
	tag.kindName = "function";
	tag.kind = 'f';

	if (vStringLength (parent) > 0)
	{
		if (is_class_parent)
		{
			tag.kindName = "member";
			tag.kind = 'm';
			tag.extensionFields.scope [0] = "class";
			tag.extensionFields.scope [1] = vStringValue (parent);
		}
		else
		{
			tag.extensionFields.scope [0] = "function";
			tag.extensionFields.scope [1] = vStringValue (parent);
		}
	}

	/* If a function starts with __, we mark it as file scope.
	 * FIXME: What is the proper way to signal such attributes?
	 * TODO: What does functions/classes starting with _ and __ mean in python?
	 */
	if (strncmp (vStringValue (function), "__", 2) == 0 &&
		strcmp (vStringValue (function), "__init__") != 0)
	{
		tag.extensionFields.access = "private";
		tag.isFileScope = TRUE;
	}
	else
	{
		tag.extensionFields.access = "public";
	}
	makeTagEntry (&tag);
}

/* Given a string with the contents of the line directly after the "class"
 * keyword, extract all necessary information and create a tag.
 */
static void makeClassTag (vString *const class, vString *const inheritance,
	vString *const parent, int is_class_parent)
{
	tagEntryInfo tag;
	initTagEntry (&tag, vStringValue (class));
	tag.kindName = "class";
	tag.kind = 'c';
	if (vStringLength (parent) > 0)
	{
		if (is_class_parent)
		{
			tag.extensionFields.scope [0] = "class";
			tag.extensionFields.scope [1] = vStringValue (parent);
		}
		else
		{
			tag.extensionFields.scope [0] = "function";
			tag.extensionFields.scope [1] = vStringValue (parent);
		}
	}
	tag.extensionFields.inheritance = vStringValue (inheritance);
	makeTagEntry (&tag);
}

/* Skip a single or double quoted string. */
static const char *skipString (const char *cp)
{
	const char *start = cp;
	int escaped = 0;
	for (cp++; *cp; cp++)
	{
		if (escaped)
			escaped--;
		else if (*cp == '\\')
			escaped++;
		else if (*cp == *start)
			return cp + 1;
	}
	return cp;
}

/* Skip everything up to an identifier start. */
static const char *skipEverything (const char *cp)
{
	for (; *cp; cp++)
	{
		if (isIdentifierFirstCharacter ((int) *cp))
			return cp;
		if (*cp == '"' || *cp == '\'')
		{
			cp = skipString(cp);
		}
    }
    return cp;
}

/* Skip an identifier. */
static const char *skipIdentifier (const char *cp)
{
	while (isIdentifierCharacter ((int) *cp))
		cp++;
    return cp;
}

static const char *findDefinitionOrClass (const char *cp)
{
	while (*cp)
	{
		cp = skipEverything (cp);
		if (!strncmp(cp, "def", 3) || !strncmp(cp, "class", 5))
		{
			return cp;
		}
		cp = skipIdentifier (cp);
	}
	return NULL;
}

static const char *skipSpace (const char *cp)
{
	while (isspace ((int) *cp))
		++cp;
	return cp;
}

/* Starting at ''cp'', parse an identifier into ''identifier''. */
static const char *parseIdentifier (const char *cp, vString *const identifier)
{
	vStringClear (identifier);
	while (isIdentifierCharacter ((int) *cp))
	{
		vStringPut (identifier, (int) *cp);
		++cp;
	}
	vStringTerminate (identifier);
	return cp;
}

static void parseClass (const char *cp, vString *const class,
	vString *const parent, int is_class_parent)
{
	vString *const inheritance = vStringNew ();
	vStringClear (inheritance);
	cp = parseIdentifier (cp, class);
	cp = skipSpace (cp);
	if (*cp == '(')
	{
		++cp;
		while (*cp != ')')
		{
			if (*cp == '\0')
			{
				/* Closing parenthesis can be in follow up line. */
				cp = (const char *) fileReadLine ();
				if (!cp) break;
				vStringPut (inheritance, ' ');
				continue;
			}
			vStringPut (inheritance, *cp);
			++cp;
		}
		vStringTerminate (inheritance);
	}
	makeClassTag (class, inheritance, parent, is_class_parent);
	vStringDelete (inheritance);
}

static void parseFunction (const char *cp, vString *const def,
	vString *const parent, int is_class_parent)
{
	cp = parseIdentifier (cp, def);
	makeFunctionTag (def, parent, is_class_parent);
}

/* Get the combined name of a nested symbol. Classes are separated with ".",
 * functions with "/". For example this code:
 * class MyClass:
 *     def myFunction:
 *         def SubFunction:
 *             class SubClass:
 *                 def Method:
 *                     pass
 * Would produce this string:
 * MyClass.MyFunction/SubFunction/SubClass.Method
 */
static boolean constructParentString(NestingLevels *nls, int indent,
	vString *result)
{
	int i;
	NestingLevel *prev = NULL;
	int is_class = FALSE;
	vStringClear (result);
	for (i = 0; i < nls->n; i++)
	{
		NestingLevel *nl = nls->levels + i;
		if (indent <= nl->indentation)
			break;
		if (prev)
		{
			if (prev->is_class)
				vStringCatS(result, ".");
			else
				vStringCatS(result, "/");
		}
		vStringCat(result, nl->name);
		is_class = nl->is_class;
		prev = nl;
	}
	return is_class;
}

static NestingLevels *newNestingLevels(void)
{
	NestingLevels *nls = xCalloc (1, NestingLevels);
	return nls;
}

static void freeNestingLevels(NestingLevels *nls)
{
	int i;
	for (i = 0; i < nls->allocated; i++)
		vStringDelete(nls->levels[i].name);
	if (nls->levels) eFree(nls->levels);
	eFree(nls);
}

/* TODO: This is totally out of place in python.c, but strlist.h is not usable.
 * Maybe should just move these three functions to a separate file, even if no
 * other parser uses them.
 */
static void addNestingLevel(NestingLevels *nls, int indentation,
	vString *name, boolean is_class)
{
	int i;
	NestingLevel *nl = NULL;

	for (i = 0; i < nls->n; i++)
	{
		nl = nls->levels + i;
		if (indentation <= nl->indentation) break;
	}
	if (i == nls->n)
	{
		if (i >= nls->allocated)
		{
			nls->allocated++;
			nls->levels = xRealloc(nls->levels,
				nls->allocated, NestingLevel);
			nls->levels[i].name = vStringNew();
		}
		nl = nls->levels + i;
	}
	nls->n = i + 1;

	vStringCopy(nl->name, name);
	nl->indentation = indentation;
	nl->is_class = is_class;
}

static void findPythonTags (void)
{
	vString *const continuation = vStringNew ();
	vString *const name = vStringNew ();
	vString *const parent = vStringNew();

	NestingLevels *const nesting_levels = newNestingLevels();

	const char *line;
	int line_skip = 0;
	boolean longStringLiteral = FALSE;

	while ((line = (const char *) fileReadLine ()) != NULL)
	{
		const char *cp = line;
		char *longstring;
		const char *keyword;
		int indent;

		cp = skipSpace (cp);

		if (*cp == '#' || *cp == '\0')  /* skip comment or blank line */
			continue;

		/* Deal with line continuation. */
		if (!line_skip) vStringClear(continuation);
		vStringCatS(continuation, line);
		vStringStripTrailing(continuation);
		if (vStringLast(continuation) == '\\')
		{
			vStringChop(continuation);
			vStringCatS(continuation, " ");
			line_skip = 1;
			continue;
		}
		cp = line = vStringValue(continuation);
		cp = skipSpace (cp);
		indent = cp - line;
		line_skip = 0;

		/* Deal with multiline string ending. */
		if (longStringLiteral)
		{
			/* Note: We do ignore anything in the same line after a multiline
			 * string for now.
			 */
			if (strstr (cp, "\"\"\""))
				longStringLiteral = FALSE;
			continue;
		}
		
		/* Deal with multiline string start. */
		if ((longstring = strstr (cp, "\"\"\"")) != NULL)
		{
			/* Note: For our purposes, the line just ends at the first long
			 * string.
			 */
			*longstring = '\0';
			longstring += 3;
			longStringLiteral = TRUE;
			while ((longstring = strstr (longstring, "\"\"\"")) != NULL)
			{
				longstring += 3;
				longStringLiteral = !longStringLiteral;
			}
		}

		/* Deal with def and class keywords. */
		keyword = findDefinitionOrClass (cp);
		if (keyword)
		{
			boolean found = FALSE;
			boolean is_class = FALSE;
			if (!strncmp (keyword, "def", 3))
			{
				cp = skipSpace (keyword + 3);
				found = TRUE;
			}
			else if (!strncmp (keyword, "class", 5))
			{
				cp = skipSpace (keyword + 5);
				found = TRUE;
				is_class = TRUE;
			}

			if (found)
			{
				boolean is_parent_class;

				is_parent_class =
					constructParentString(nesting_levels, indent, parent);

				if (is_class)
					parseClass (cp, name, parent, is_parent_class);
				else
					parseFunction(cp, name, parent, is_parent_class);

				addNestingLevel(nesting_levels, indent, name, is_class);
			}
		}
	}
	/* Clean up all memory we allocated. */
	vStringDelete (parent);
	vStringDelete (name);
	vStringDelete (continuation);
	freeNestingLevels (nesting_levels);
}

extern parserDefinition *PythonParser (void)
{
	static const char *const extensions[] = { "py", "pyx", "pxd", "scons", NULL };
	parserDefinition *def = parserNew ("Python");
	def->kinds = PythonKinds;
	def->kindCount = KIND_COUNT (PythonKinds);
	def->extensions = extensions;
	def->parser = findPythonTags;
	return def;
}

/* vi:set tabstop=4 shiftwidth=4: */

Bell Labs OSI certified Powered by Plan 9

(Return to Plan 9 Home Page)

Copyright © 2021 Plan 9 Foundation. All Rights Reserved.
Comments to webmaster@9p.io.