Skip to content

Commit d73233f

Browse files
committed
feat: code tokenization
1 parent 10761b3 commit d73233f

File tree

5 files changed

+182
-0
lines changed

5 files changed

+182
-0
lines changed
Lines changed: 115 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,115 @@
1+
import { CodeTokens } from './CodeTokens';
2+
import { PythonIdentifiers } from './identifiers/PythonIdentifiers';
3+
import { CommonLangIdentifiers } from './identifiers/CommonLangIdentifiers';
4+
5+
export class CodeTokenizer {
6+
private pyIdentifiers = new PythonIdentifiers();
7+
8+
public tokenizeCode(code: string, language: string): CodeTokens[] {
9+
switch (language) {
10+
case 'python':
11+
return this.tokenizePython(code, this.pyIdentifiers);
12+
default:
13+
return [];
14+
}
15+
}
16+
17+
private tokenizePython(
18+
code: string,
19+
languageIdentifier: CommonLangIdentifiers,
20+
): CodeTokens[] {
21+
const lines = code.split('\n');
22+
let tokenizedCode: CodeTokens[] = [];
23+
24+
lines.forEach((line) => {
25+
const trimedLine = line.trim();
26+
27+
// Function detection
28+
if (
29+
trimedLine.startsWith(languageIdentifier.functionIdentifier()) &&
30+
trimedLine.endsWith(languageIdentifier.endLoopAndCondIdentifier())
31+
) {
32+
tokenizedCode.push(CodeTokens.FUNC);
33+
const numberOfParams =
34+
CodeTokenizer.numberOfFunctionParameters(trimedLine);
35+
tokenizedCode = CodeTokenizer.pushTokensInTab(
36+
tokenizedCode,
37+
CodeTokens.FUNCPARAM,
38+
numberOfParams,
39+
);
40+
41+
// For line
42+
} else if (
43+
trimedLine.startsWith(languageIdentifier.forIdentifier()) &&
44+
trimedLine.endsWith(languageIdentifier.endLoopAndCondIdentifier())
45+
) {
46+
tokenizedCode.push(CodeTokens.FOR);
47+
// While line
48+
} else if (
49+
trimedLine.startsWith(languageIdentifier.whileIdentifier()) &&
50+
trimedLine.endsWith(languageIdentifier.endLoopAndCondIdentifier())
51+
) {
52+
tokenizedCode.push(CodeTokens.WHILE);
53+
// If line
54+
} else if (
55+
trimedLine.startsWith(languageIdentifier.ifIdentifier()) &&
56+
trimedLine.endsWith(languageIdentifier.endLoopAndCondIdentifier())
57+
) {
58+
tokenizedCode.push(CodeTokens.IF);
59+
const andOccurrences = CodeTokenizer.numberTokenPresentInLine(
60+
languageIdentifier.andIdentifier(),
61+
trimedLine,
62+
);
63+
tokenizedCode = CodeTokenizer.pushTokensInTab(
64+
tokenizedCode,
65+
CodeTokens.AND,
66+
andOccurrences,
67+
);
68+
69+
const orOccurrences = CodeTokenizer.numberTokenPresentInLine(
70+
languageIdentifier.orIdentifier(),
71+
trimedLine,
72+
);
73+
tokenizedCode = CodeTokenizer.pushTokensInTab(
74+
tokenizedCode,
75+
CodeTokens.OR,
76+
orOccurrences,
77+
);
78+
79+
// Case line
80+
} else if (
81+
trimedLine.startsWith(languageIdentifier.caseIdentifier()) &&
82+
trimedLine.endsWith(languageIdentifier.endLoopAndCondIdentifier())
83+
) {
84+
tokenizedCode.push(CodeTokens.CASE);
85+
// Other lines
86+
} else {
87+
tokenizedCode.push(CodeTokens.LINE);
88+
}
89+
});
90+
91+
console.log(tokenizedCode);
92+
93+
return tokenizedCode;
94+
}
95+
96+
private static numberOfFunctionParameters(line: string): number {
97+
const attributes = line.substring(line.indexOf('('), line.indexOf(')'));
98+
return this.numberTokenPresentInLine(',', attributes) + 1;
99+
}
100+
101+
private static numberTokenPresentInLine(token: string, line: string): number {
102+
return line.split(token).length - 1;
103+
}
104+
105+
private static pushTokensInTab(
106+
tab: CodeTokens[],
107+
token: CodeTokens,
108+
occurrences: number,
109+
): CodeTokens[] {
110+
for (let i = 0; i < occurrences; i += 1) {
111+
tab.push(token);
112+
}
113+
return tab;
114+
}
115+
}
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
export enum CodeTokens {
2+
// functions
3+
FUNC = 'FUNCTION',
4+
FUNCPARAM = 'FUNCPARAM',
5+
// loops
6+
FOR = 'FOR',
7+
WHILE = 'WHILE',
8+
// conditions
9+
IF = 'IF',
10+
CASE = 'CASE',
11+
OR = 'OR',
12+
AND = 'AND',
13+
// common line
14+
LINE = 'LINE',
15+
}
Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
export class CommonLangIdentifiers {
2+
functionIdentifier(): string {
3+
return '';
4+
}
5+
6+
forIdentifier(): string {
7+
return 'for';
8+
}
9+
10+
whileIdentifier(): string {
11+
return 'while';
12+
}
13+
14+
ifIdentifier(): string {
15+
return 'if';
16+
}
17+
18+
caseIdentifier(): string {
19+
return 'case';
20+
}
21+
22+
orIdentifier(): string {
23+
return '||';
24+
}
25+
26+
andIdentifier(): string {
27+
return '&&';
28+
}
29+
30+
endLoopAndCondIdentifier(): string {
31+
return '{';
32+
}
33+
}
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
import { CommonLangIdentifiers } from './CommonLangIdentifiers';
2+
3+
export class PythonIdentifiers extends CommonLangIdentifiers {
4+
functionIdentifier(): string {
5+
return 'def';
6+
}
7+
8+
caseIdentifier(): string {
9+
return '';
10+
}
11+
12+
endLoopAndCondIdentifier(): string {
13+
return ':';
14+
}
15+
}

src/submissions/submissions.service.ts

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ import { FindSubmissionDTO } from './dto/find-submission.dto';
1818
import { InsertSubmissionDTO } from './dto/insert-submission-dto';
1919
import { JobStatusDTO } from './dto/job-status.dto';
2020
import { Submission } from './submission.entity';
21+
import { CodeTokenizer } from '../code-quality/tokenizer/CodeTokenizer';
2122

2223
@Injectable()
2324
export class SubmissionsService {
@@ -31,6 +32,8 @@ export class SubmissionsService {
3132

3233
private hashService = new HashService();
3334

35+
private codeTokenizer = new CodeTokenizer();
36+
3437
async create(
3538
insertSubmissionDTO: InsertSubmissionDTO,
3639
lintScore: number,
@@ -57,6 +60,7 @@ export class SubmissionsService {
5760
insertSubmissionDTO.language,
5861
);
5962
// submission.self = submission;
63+
this.codeTokenizer.tokenizeCode(submission.code, submission.language);
6064

6165
return submission.save();
6266
}

0 commit comments

Comments
 (0)