LLVM 官方教程编译器实现笔记
主要研究使用 LLVM 进行开发的流程,参考教程 1~7 章节。而 8~9章节介绍的主要是如何生成 object 文件并增加调试信息,需要链接到一个手写的 main.cpp
文件。
项目结构
1.
2├── CMakeLists.txt
3├── include
4│ └── KaleidoscopeJIT.h
5├── src
6│ └── main.cpp
7└── tests
8 └── fib.ks
代码清单
tests/fib.ks
1def fib(x)
2 if (x < 3) then
3 1
4 else
5 fib(x-1)+fib(x-2);
6
7fib(10);
CMakeLists.txt
1cmake_minimum_required(VERSION 3.15)
2
3set(CMAKE_CXX_STANDARD 20)
4set(CMAKE_CXX_STANDARD_REQUIRED ON)
5set(CMAKE_CXX_EXTENSION OFF)
6
7project(kaldeidoscope VERSION 1.0.0 LANGUAGES CXX C)
8
9# llvm
10
11find_package(LLVM REQUIRED CONFIG)
12add_definitions(${LLVM_DEFINITIONS})
13
14message(STATUS "Found LLVM ${LLVM_PACKAGE_VERSION}")
15message(STATUS "Using LLVMConfig.cmake in: ${LLVM_DIR}")
16
17llvm_map_components_to_libnames(llvm_libs
18 Analysis
19 Core
20 ExecutionEngine
21 InstCombine
22 Object
23 OrcJIT
24 RuntimeDyld
25 ScalarOpts
26 Support
27 native
28)
29
30add_executable("main")
31aux_source_directory("src" sources)
32aux_source_directory("include" sources)
33target_sources("main" PUBLIC ${sources})
34
35target_link_libraries(main PRIVATE ${llvm_libs})
KaleidoscopeJIT.h
1//===- KaleidoscopeJIT.h - A simple JIT for Kaleidoscope --------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Contains a simple JIT definition for use in the kaleidoscope tutorials.
10//
11//===----------------------------------------------------------------------===//
12
13#ifndef LLVM_EXECUTIONENGINE_ORC_KALEIDOSCOPEJIT_H
14#define LLVM_EXECUTIONENGINE_ORC_KALEIDOSCOPEJIT_H
15
16#include "llvm/ADT/StringRef.h"
17#include "llvm/ExecutionEngine/JITSymbol.h"
18#include "llvm/ExecutionEngine/Orc/CompileUtils.h"
19#include "llvm/ExecutionEngine/Orc/Core.h"
20#include "llvm/ExecutionEngine/Orc/ExecutionUtils.h"
21#include "llvm/ExecutionEngine/Orc/ExecutorProcessControl.h"
22#include "llvm/ExecutionEngine/Orc/IRCompileLayer.h"
23#include "llvm/ExecutionEngine/Orc/JITTargetMachineBuilder.h"
24#include "llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h"
25#include "llvm/ExecutionEngine/SectionMemoryManager.h"
26#include "llvm/IR/DataLayout.h"
27#include "llvm/IR/LLVMContext.h"
28#include <memory>
29
30namespace llvm {
31namespace orc {
32
33class KaleidoscopeJIT {
34private:
35 std::unique_ptr<ExecutionSession> ES;
36
37 DataLayout DL;
38 MangleAndInterner Mangle;
39
40 RTDyldObjectLinkingLayer ObjectLayer;
41 IRCompileLayer CompileLayer;
42
43 JITDylib &MainJD;
44
45public:
46 KaleidoscopeJIT(std::unique_ptr<ExecutionSession> ES,
47 JITTargetMachineBuilder JTMB, DataLayout DL)
48 : ES(std::move(ES)), DL(std::move(DL)), Mangle(*this->ES, this->DL),
49 ObjectLayer(*this->ES,
50 []() { return std::make_unique<SectionMemoryManager>(); }),
51 CompileLayer(*this->ES, ObjectLayer,
52 std::make_unique<ConcurrentIRCompiler>(std::move(JTMB))),
53 MainJD(this->ES->createBareJITDylib("<main>")) {
54 MainJD.addGenerator(
55 cantFail(DynamicLibrarySearchGenerator::GetForCurrentProcess(
56 DL.getGlobalPrefix())));
57 if (JTMB.getTargetTriple().isOSBinFormatCOFF()) {
58 ObjectLayer.setOverrideObjectFlagsWithResponsibilityFlags(true);
59 ObjectLayer.setAutoClaimResponsibilityForObjectSymbols(true);
60 }
61 }
62
63 ~KaleidoscopeJIT() {
64 if (auto Err = ES->endSession())
65 ES->reportError(std::move(Err));
66 }
67
68 static Expected<std::unique_ptr<KaleidoscopeJIT>> Create() {
69 auto EPC = SelfExecutorProcessControl::Create();
70 if (!EPC)
71 return EPC.takeError();
72
73 auto ES = std::make_unique<ExecutionSession>(std::move(*EPC));
74
75 JITTargetMachineBuilder JTMB(
76 ES->getExecutorProcessControl().getTargetTriple());
77
78 auto DL = JTMB.getDefaultDataLayoutForTarget();
79 if (!DL)
80 return DL.takeError();
81
82 return std::make_unique<KaleidoscopeJIT>(std::move(ES), std::move(JTMB),
83 std::move(*DL));
84 }
85
86 const DataLayout &getDataLayout() const { return DL; }
87
88 JITDylib &getMainJITDylib() { return MainJD; }
89
90 Error addModule(ThreadSafeModule TSM, ResourceTrackerSP RT = nullptr) {
91 if (!RT)
92 RT = MainJD.getDefaultResourceTracker();
93 return CompileLayer.add(RT, std::move(TSM));
94 }
95
96 Expected<JITEvaluatedSymbol> lookup(StringRef Name) {
97 return ES->lookup({&MainJD}, Mangle(Name.str()));
98 }
99};
100
101} // end namespace orc
102} // end namespace llvm
103
104#endif // LLVM_EXECUTIONENGINE_ORC_KALEIDOSCOPEJIT_H
main.cpp
1#include "llvm/ADT/STLExtras.h"
2#include "llvm/Analysis/BasicAliasAnalysis.h"
3#include "llvm/Analysis/Passes.h"
4#include "llvm/IR/DIBuilder.h"
5#include "llvm/IR/IRBuilder.h"
6#include "llvm/IR/LLVMContext.h"
7#include "llvm/IR/LegacyPassManager.h"
8#include "llvm/IR/Module.h"
9#include "llvm/IR/Verifier.h"
10#include "llvm/Support/Host.h"
11#include "llvm/Support/TargetSelect.h"
12#include "llvm/Transforms/Scalar.h"
13#include <cctype>
14#include <cstdio>
15#include <map>
16#include <string>
17#include <vector>
18#include "../include/KaleidoscopeJIT.h"
19
20using namespace llvm;
21using namespace llvm::orc;
22
23//===----------------------------------------------------------------------===//
24// Lexer
25//===----------------------------------------------------------------------===//
26
27// The lexer returns tokens [0-255] if it is an unknown character, otherwise one
28// of these for known things.
29enum Token {
30 tok_eof = -1,
31
32 // commands
33 tok_def = -2,
34 tok_extern = -3,
35
36 // primary
37 tok_identifier = -4,
38 tok_number = -5,
39
40 // control
41 tok_if = -6,
42 tok_then = -7,
43 tok_else = -8,
44 tok_for = -9,
45 tok_in = -10,
46
47 // operators
48 tok_binary = -11,
49 tok_unary = -12,
50
51 // var definition
52 tok_var = -13
53};
54
55std::string getTokName(int Tok) {
56 switch (Tok) {
57 case tok_eof:
58 return "eof";
59 case tok_def:
60 return "def";
61 case tok_extern:
62 return "extern";
63 case tok_identifier:
64 return "identifier";
65 case tok_number:
66 return "number";
67 case tok_if:
68 return "if";
69 case tok_then:
70 return "then";
71 case tok_else:
72 return "else";
73 case tok_for:
74 return "for";
75 case tok_in:
76 return "in";
77 case tok_binary:
78 return "binary";
79 case tok_unary:
80 return "unary";
81 case tok_var:
82 return "var";
83 }
84 return std::string(1, (char)Tok);
85}
86
87namespace {
88class PrototypeAST;
89class ExprAST;
90}
91
92struct DebugInfo {
93 DICompileUnit *TheCU;
94 DIType *DblTy;
95 std::vector<DIScope *> LexicalBlocks;
96
97 void emitLocation(ExprAST *AST);
98 DIType *getDoubleTy();
99} KSDbgInfo;
100
101struct SourceLocation {
102 int Line;
103 int Col;
104};
105static SourceLocation CurLoc;
106static SourceLocation LexLoc = {1, 0};
107
108static int advance() {
109 int LastChar = getchar();
110
111 if (LastChar == '\n' || LastChar == '\r') {
112 LexLoc.Line++;
113 LexLoc.Col = 0;
114 } else
115 LexLoc.Col++;
116 return LastChar;
117}
118
119static std::string IdentifierStr; // Filled in if tok_identifier
120static double NumVal; // Filled in if tok_number
121
122/// gettok - Return the next token from standard input.
123static int gettok() {
124 static int LastChar = ' ';
125
126 // Skip any whitespace.
127 while (isspace(LastChar))
128 LastChar = advance();
129
130 CurLoc = LexLoc;
131
132 if (isalpha(LastChar)) { // identifier: [a-zA-Z][a-zA-Z0-9]*
133 IdentifierStr = LastChar;
134 while (isalnum((LastChar = advance())))
135 IdentifierStr += LastChar;
136
137 if (IdentifierStr == "def")
138 return tok_def;
139 if (IdentifierStr == "extern")
140 return tok_extern;
141 if (IdentifierStr == "if")
142 return tok_if;
143 if (IdentifierStr == "then")
144 return tok_then;
145 if (IdentifierStr == "else")
146 return tok_else;
147 if (IdentifierStr == "for")
148 return tok_for;
149 if (IdentifierStr == "in")
150 return tok_in;
151 if (IdentifierStr == "binary")
152 return tok_binary;
153 if (IdentifierStr == "unary")
154 return tok_unary;
155 if (IdentifierStr == "var")
156 return tok_var;
157 return tok_identifier;
158 }
159
160 if (isdigit(LastChar) || LastChar == '.') { // Number: [0-9.]+
161 std::string NumStr;
162 do {
163 NumStr += LastChar;
164 LastChar = advance();
165 } while (isdigit(LastChar) || LastChar == '.');
166
167 NumVal = strtod(NumStr.c_str(), nullptr);
168 return tok_number;
169 }
170
171 if (LastChar == '#') {
172 // Comment until end of line.
173 do
174 LastChar = advance();
175 while (LastChar != EOF && LastChar != '\n' && LastChar != '\r');
176
177 if (LastChar != EOF)
178 return gettok();
179 }
180
181 // Check for end of file. Don't eat the EOF.
182 if (LastChar == EOF)
183 return tok_eof;
184
185 // Otherwise, just return the character as its ascii value.
186 int ThisChar = LastChar;
187 LastChar = advance();
188 return ThisChar;
189}
190
191//===----------------------------------------------------------------------===//
192// Abstract Syntax Tree (aka Parse Tree)
193//===----------------------------------------------------------------------===//
194namespace {
195
196raw_ostream &indent(raw_ostream &O, int size) {
197 return O << std::string(size, ' ');
198}
199
200/// ExprAST - Base class for all expression nodes.
201class ExprAST {
202 SourceLocation Loc;
203
204public:
205 ExprAST(SourceLocation Loc = CurLoc) : Loc(Loc) {}
206 virtual ~ExprAST() {}
207 virtual Value *codegen() = 0;
208 int getLine() const { return Loc.Line; }
209 int getCol() const { return Loc.Col; }
210 virtual raw_ostream &dump(raw_ostream &out, int ind) {
211 return out << ':' << getLine() << ':' << getCol() << '\n';
212 }
213};
214
215/// NumberExprAST - Expression class for numeric literals like "1.0".
216class NumberExprAST : public ExprAST {
217 double Val;
218
219public:
220 NumberExprAST(double Val) : Val(Val) {}
221 raw_ostream &dump(raw_ostream &out, int ind) override {
222 return ExprAST::dump(out << Val, ind);
223 }
224 Value *codegen() override;
225};
226
227/// VariableExprAST - Expression class for referencing a variable, like "a".
228class VariableExprAST : public ExprAST {
229 std::string Name;
230
231public:
232 VariableExprAST(SourceLocation Loc, const std::string &Name)
233 : ExprAST(Loc), Name(Name) {}
234 const std::string &getName() const { return Name; }
235 Value *codegen() override;
236 raw_ostream &dump(raw_ostream &out, int ind) override {
237 return ExprAST::dump(out << Name, ind);
238 }
239};
240
241/// UnaryExprAST - Expression class for a unary operator.
242class UnaryExprAST : public ExprAST {
243 char Opcode;
244 std::unique_ptr<ExprAST> Operand;
245
246public:
247 UnaryExprAST(char Opcode, std::unique_ptr<ExprAST> Operand)
248 : Opcode(Opcode), Operand(std::move(Operand)) {}
249 Value *codegen() override;
250 raw_ostream &dump(raw_ostream &out, int ind) override {
251 ExprAST::dump(out << "unary" << Opcode, ind);
252 Operand->dump(out, ind + 1);
253 return out;
254 }
255};
256
257/// BinaryExprAST - Expression class for a binary operator.
258class BinaryExprAST : public ExprAST {
259 char Op;
260 std::unique_ptr<ExprAST> LHS, RHS;
261
262public:
263 BinaryExprAST(SourceLocation Loc, char Op, std::unique_ptr<ExprAST> LHS,
264 std::unique_ptr<ExprAST> RHS)
265 : ExprAST(Loc), Op(Op), LHS(std::move(LHS)), RHS(std::move(RHS)) {}
266 Value *codegen() override;
267 raw_ostream &dump(raw_ostream &out, int ind) override {
268 ExprAST::dump(out << "binary" << Op, ind);
269 LHS->dump(indent(out, ind) << "LHS:", ind + 1);
270 RHS->dump(indent(out, ind) << "RHS:", ind + 1);
271 return out;
272 }
273};
274
275/// CallExprAST - Expression class for function calls.
276class CallExprAST : public ExprAST {
277 std::string Callee;
278 std::vector<std::unique_ptr<ExprAST>> Args;
279
280public:
281 CallExprAST(SourceLocation Loc, const std::string &Callee,
282 std::vector<std::unique_ptr<ExprAST>> Args)
283 : ExprAST(Loc), Callee(Callee), Args(std::move(Args)) {}
284 Value *codegen() override;
285 raw_ostream &dump(raw_ostream &out, int ind) override {
286 ExprAST::dump(out << "call " << Callee, ind);
287 for (const auto &Arg : Args)
288 Arg->dump(indent(out, ind + 1), ind + 1);
289 return out;
290 }
291};
292
293/// IfExprAST - Expression class for if/then/else.
294class IfExprAST : public ExprAST {
295 std::unique_ptr<ExprAST> Cond, Then, Else;
296
297public:
298 IfExprAST(SourceLocation Loc, std::unique_ptr<ExprAST> Cond,
299 std::unique_ptr<ExprAST> Then, std::unique_ptr<ExprAST> Else)
300 : ExprAST(Loc), Cond(std::move(Cond)), Then(std::move(Then)),
301 Else(std::move(Else)) {}
302 Value *codegen() override;
303 raw_ostream &dump(raw_ostream &out, int ind) override {
304 ExprAST::dump(out << "if", ind);
305 Cond->dump(indent(out, ind) << "Cond:", ind + 1);
306 Then->dump(indent(out, ind) << "Then:", ind + 1);
307 Else->dump(indent(out, ind) << "Else:", ind + 1);
308 return out;
309 }
310};
311
312/// ForExprAST - Expression class for for/in.
313class ForExprAST : public ExprAST {
314 std::string VarName;
315 std::unique_ptr<ExprAST> Start, End, Step, Body;
316
317public:
318 ForExprAST(const std::string &VarName, std::unique_ptr<ExprAST> Start,
319 std::unique_ptr<ExprAST> End, std::unique_ptr<ExprAST> Step,
320 std::unique_ptr<ExprAST> Body)
321 : VarName(VarName), Start(std::move(Start)), End(std::move(End)),
322 Step(std::move(Step)), Body(std::move(Body)) {}
323 Value *codegen() override;
324 raw_ostream &dump(raw_ostream &out, int ind) override {
325 ExprAST::dump(out << "for", ind);
326 Start->dump(indent(out, ind) << "Cond:", ind + 1);
327 End->dump(indent(out, ind) << "End:", ind + 1);
328 Step->dump(indent(out, ind) << "Step:", ind + 1);
329 Body->dump(indent(out, ind) << "Body:", ind + 1);
330 return out;
331 }
332};
333
334/// VarExprAST - Expression class for var/in
335class VarExprAST : public ExprAST {
336 std::vector<std::pair<std::string, std::unique_ptr<ExprAST>>> VarNames;
337 std::unique_ptr<ExprAST> Body;
338
339public:
340 VarExprAST(
341 std::vector<std::pair<std::string, std::unique_ptr<ExprAST>>> VarNames,
342 std::unique_ptr<ExprAST> Body)
343 : VarNames(std::move(VarNames)), Body(std::move(Body)) {}
344 Value *codegen() override;
345 raw_ostream &dump(raw_ostream &out, int ind) override {
346 ExprAST::dump(out << "var", ind);
347 for (const auto &NamedVar : VarNames)
348 NamedVar.second->dump(indent(out, ind) << NamedVar.first << ':', ind + 1);
349 Body->dump(indent(out, ind) << "Body:", ind + 1);
350 return out;
351 }
352};
353
354/// PrototypeAST - This class represents the "prototype" for a function,
355/// which captures its name, and its argument names (thus implicitly the number
356/// of arguments the function takes), as well as if it is an operator.
357class PrototypeAST {
358 std::string Name;
359 std::vector<std::string> Args;
360 bool IsOperator;
361 unsigned Precedence; // Precedence if a binary op.
362 int Line;
363
364public:
365 PrototypeAST(SourceLocation Loc, const std::string &Name,
366 std::vector<std::string> Args, bool IsOperator = false,
367 unsigned Prec = 0)
368 : Name(Name), Args(std::move(Args)), IsOperator(IsOperator),
369 Precedence(Prec), Line(Loc.Line) {}
370 Function *codegen();
371 const std::string &getName() const { return Name; }
372
373 bool isUnaryOp() const { return IsOperator && Args.size() == 1; }
374 bool isBinaryOp() const { return IsOperator && Args.size() == 2; }
375
376 char getOperatorName() const {
377 assert(isUnaryOp() || isBinaryOp());
378 return Name[Name.size() - 1];
379 }
380
381 unsigned getBinaryPrecedence() const { return Precedence; }
382 int getLine() const { return Line; }
383};
384
385/// FunctionAST - This class represents a function definition itself.
386class FunctionAST {
387 std::unique_ptr<PrototypeAST> Proto;
388 std::unique_ptr<ExprAST> Body;
389
390public:
391 FunctionAST(std::unique_ptr<PrototypeAST> Proto,
392 std::unique_ptr<ExprAST> Body)
393 : Proto(std::move(Proto)), Body(std::move(Body)) {}
394 Function *codegen();
395 raw_ostream &dump(raw_ostream &out, int ind) {
396 indent(out, ind) << "FunctionAST\n";
397 ++ind;
398 indent(out, ind) << "Body:";
399 return Body ? Body->dump(out, ind) : out << "null\n";
400 }
401};
402} // end anonymous namespace
403
404//===----------------------------------------------------------------------===//
405// Parser
406//===----------------------------------------------------------------------===//
407
408/// CurTok/getNextToken - Provide a simple token buffer. CurTok is the current
409/// token the parser is looking at. getNextToken reads another token from the
410/// lexer and updates CurTok with its results.
411static int CurTok;
412static int getNextToken() { return CurTok = gettok(); }
413
414/// BinopPrecedence - This holds the precedence for each binary operator that is
415/// defined.
416static std::map<char, int> BinopPrecedence;
417
418/// GetTokPrecedence - Get the precedence of the pending binary operator token.
419static int GetTokPrecedence() {
420 if (!isascii(CurTok))
421 return -1;
422
423 // Make sure it's a declared binop.
424 int TokPrec = BinopPrecedence[CurTok];
425 if (TokPrec <= 0)
426 return -1;
427 return TokPrec;
428}
429
430/// LogError* - These are little helper functions for error handling.
431std::unique_ptr<ExprAST> LogError(const char *Str) {
432 fprintf(stderr, "Error: %s\n", Str);
433 return nullptr;
434}
435
436std::unique_ptr<PrototypeAST> LogErrorP(const char *Str) {
437 LogError(Str);
438 return nullptr;
439}
440
441static std::unique_ptr<ExprAST> ParseExpression();
442
443/// numberexpr ::= number
444static std::unique_ptr<ExprAST> ParseNumberExpr() {
445 auto Result = std::make_unique<NumberExprAST>(NumVal);
446 getNextToken(); // consume the number
447 return std::move(Result);
448}
449
450/// parenexpr ::= '(' expression ')'
451static std::unique_ptr<ExprAST> ParseParenExpr() {
452 getNextToken(); // eat (.
453 auto V = ParseExpression();
454 if (!V)
455 return nullptr;
456
457 if (CurTok != ')')
458 return LogError("expected ')'");
459 getNextToken(); // eat ).
460 return V;
461}
462
463/// identifierexpr
464/// ::= identifier
465/// ::= identifier '(' expression* ')'
466static std::unique_ptr<ExprAST> ParseIdentifierExpr() {
467 std::string IdName = IdentifierStr;
468
469 SourceLocation LitLoc = CurLoc;
470
471 getNextToken(); // eat identifier.
472
473 if (CurTok != '(') // Simple variable ref.
474 return std::make_unique<VariableExprAST>(LitLoc, IdName);
475
476 // Call.
477 getNextToken(); // eat (
478 std::vector<std::unique_ptr<ExprAST>> Args;
479 if (CurTok != ')') {
480 while (true) {
481 if (auto Arg = ParseExpression())
482 Args.push_back(std::move(Arg));
483 else
484 return nullptr;
485
486 if (CurTok == ')')
487 break;
488
489 if (CurTok != ',')
490 return LogError("Expected ')' or ',' in argument list");
491 getNextToken();
492 }
493 }
494
495 // Eat the ')'.
496 getNextToken();
497
498 return std::make_unique<CallExprAST>(LitLoc, IdName, std::move(Args));
499}
500
501/// ifexpr ::= 'if' expression 'then' expression 'else' expression
502static std::unique_ptr<ExprAST> ParseIfExpr() {
503 SourceLocation IfLoc = CurLoc;
504
505 getNextToken(); // eat the if.
506
507 // condition.
508 auto Cond = ParseExpression();
509 if (!Cond)
510 return nullptr;
511
512 if (CurTok != tok_then)
513 return LogError("expected then");
514 getNextToken(); // eat the then
515
516 auto Then = ParseExpression();
517 if (!Then)
518 return nullptr;
519
520 if (CurTok != tok_else)
521 return LogError("expected else");
522
523 getNextToken();
524
525 auto Else = ParseExpression();
526 if (!Else)
527 return nullptr;
528
529 return std::make_unique<IfExprAST>(IfLoc, std::move(Cond), std::move(Then),
530 std::move(Else));
531}
532
533/// forexpr ::= 'for' identifier '=' expr ',' expr (',' expr)? 'in' expression
534static std::unique_ptr<ExprAST> ParseForExpr() {
535 getNextToken(); // eat the for.
536
537 if (CurTok != tok_identifier)
538 return LogError("expected identifier after for");
539
540 std::string IdName = IdentifierStr;
541 getNextToken(); // eat identifier.
542
543 if (CurTok != '=')
544 return LogError("expected '=' after for");
545 getNextToken(); // eat '='.
546
547 auto Start = ParseExpression();
548 if (!Start)
549 return nullptr;
550 if (CurTok != ',')
551 return LogError("expected ',' after for start value");
552 getNextToken();
553
554 auto End = ParseExpression();
555 if (!End)
556 return nullptr;
557
558 // The step value is optional.
559 std::unique_ptr<ExprAST> Step;
560 if (CurTok == ',') {
561 getNextToken();
562 Step = ParseExpression();
563 if (!Step)
564 return nullptr;
565 }
566
567 if (CurTok != tok_in)
568 return LogError("expected 'in' after for");
569 getNextToken(); // eat 'in'.
570
571 auto Body = ParseExpression();
572 if (!Body)
573 return nullptr;
574
575 return std::make_unique<ForExprAST>(IdName, std::move(Start), std::move(End),
576 std::move(Step), std::move(Body));
577}
578
579/// varexpr ::= 'var' identifier ('=' expression)?
580// (',' identifier ('=' expression)?)* 'in' expression
581static std::unique_ptr<ExprAST> ParseVarExpr() {
582 getNextToken(); // eat the var.
583
584 std::vector<std::pair<std::string, std::unique_ptr<ExprAST>>> VarNames;
585
586 // At least one variable name is required.
587 if (CurTok != tok_identifier)
588 return LogError("expected identifier after var");
589
590 while (true) {
591 std::string Name = IdentifierStr;
592 getNextToken(); // eat identifier.
593
594 // Read the optional initializer.
595 std::unique_ptr<ExprAST> Init = nullptr;
596 if (CurTok == '=') {
597 getNextToken(); // eat the '='.
598
599 Init = ParseExpression();
600 if (!Init)
601 return nullptr;
602 }
603
604 VarNames.push_back(std::make_pair(Name, std::move(Init)));
605
606 // End of var list, exit loop.
607 if (CurTok != ',')
608 break;
609 getNextToken(); // eat the ','.
610
611 if (CurTok != tok_identifier)
612 return LogError("expected identifier list after var");
613 }
614
615 // At this point, we have to have 'in'.
616 if (CurTok != tok_in)
617 return LogError("expected 'in' keyword after 'var'");
618 getNextToken(); // eat 'in'.
619
620 auto Body = ParseExpression();
621 if (!Body)
622 return nullptr;
623
624 return std::make_unique<VarExprAST>(std::move(VarNames), std::move(Body));
625}
626
627/// primary
628/// ::= identifierexpr
629/// ::= numberexpr
630/// ::= parenexpr
631/// ::= ifexpr
632/// ::= forexpr
633/// ::= varexpr
634static std::unique_ptr<ExprAST> ParsePrimary() {
635 switch (CurTok) {
636 default:
637 return LogError("unknown token when expecting an expression");
638 case tok_identifier:
639 return ParseIdentifierExpr();
640 case tok_number:
641 return ParseNumberExpr();
642 case '(':
643 return ParseParenExpr();
644 case tok_if:
645 return ParseIfExpr();
646 case tok_for:
647 return ParseForExpr();
648 case tok_var:
649 return ParseVarExpr();
650 }
651}
652
653/// unary
654/// ::= primary
655/// ::= '!' unary
656static std::unique_ptr<ExprAST> ParseUnary() {
657 // If the current token is not an operator, it must be a primary expr.
658 if (!isascii(CurTok) || CurTok == '(' || CurTok == ',')
659 return ParsePrimary();
660
661 // If this is a unary operator, read it.
662 int Opc = CurTok;
663 getNextToken();
664 if (auto Operand = ParseUnary())
665 return std::make_unique<UnaryExprAST>(Opc, std::move(Operand));
666 return nullptr;
667}
668
669/// binoprhs
670/// ::= ('+' unary)*
671static std::unique_ptr<ExprAST> ParseBinOpRHS(int ExprPrec,
672 std::unique_ptr<ExprAST> LHS) {
673 // If this is a binop, find its precedence.
674 while (true) {
675 int TokPrec = GetTokPrecedence();
676
677 // If this is a binop that binds at least as tightly as the current binop,
678 // consume it, otherwise we are done.
679 if (TokPrec < ExprPrec)
680 return LHS;
681
682 // Okay, we know this is a binop.
683 int BinOp = CurTok;
684 SourceLocation BinLoc = CurLoc;
685 getNextToken(); // eat binop
686
687 // Parse the unary expression after the binary operator.
688 auto RHS = ParseUnary();
689 if (!RHS)
690 return nullptr;
691
692 // If BinOp binds less tightly with RHS than the operator after RHS, let
693 // the pending operator take RHS as its LHS.
694 int NextPrec = GetTokPrecedence();
695 if (TokPrec < NextPrec) {
696 RHS = ParseBinOpRHS(TokPrec + 1, std::move(RHS));
697 if (!RHS)
698 return nullptr;
699 }
700
701 // Merge LHS/RHS.
702 LHS = std::make_unique<BinaryExprAST>(BinLoc, BinOp, std::move(LHS),
703 std::move(RHS));
704 }
705}
706
707/// expression
708/// ::= unary binoprhs
709///
710static std::unique_ptr<ExprAST> ParseExpression() {
711 auto LHS = ParseUnary();
712 if (!LHS)
713 return nullptr;
714
715 return ParseBinOpRHS(0, std::move(LHS));
716}
717
718/// prototype
719/// ::= id '(' id* ')'
720/// ::= binary LETTER number? (id, id)
721/// ::= unary LETTER (id)
722static std::unique_ptr<PrototypeAST> ParsePrototype() {
723 std::string FnName;
724
725 SourceLocation FnLoc = CurLoc;
726
727 unsigned Kind = 0; // 0 = identifier, 1 = unary, 2 = binary.
728 unsigned BinaryPrecedence = 30;
729
730 switch (CurTok) {
731 default:
732 return LogErrorP("Expected function name in prototype");
733 case tok_identifier:
734 FnName = IdentifierStr;
735 Kind = 0;
736 getNextToken();
737 break;
738 case tok_unary:
739 getNextToken();
740 if (!isascii(CurTok))
741 return LogErrorP("Expected unary operator");
742 FnName = "unary";
743 FnName += (char)CurTok;
744 Kind = 1;
745 getNextToken();
746 break;
747 case tok_binary:
748 getNextToken();
749 if (!isascii(CurTok))
750 return LogErrorP("Expected binary operator");
751 FnName = "binary";
752 FnName += (char)CurTok;
753 Kind = 2;
754 getNextToken();
755
756 // Read the precedence if present.
757 if (CurTok == tok_number) {
758 if (NumVal < 1 || NumVal > 100)
759 return LogErrorP("Invalid precedence: must be 1..100");
760 BinaryPrecedence = (unsigned)NumVal;
761 getNextToken();
762 }
763 break;
764 }
765
766 if (CurTok != '(')
767 return LogErrorP("Expected '(' in prototype");
768
769 std::vector<std::string> ArgNames;
770 while (getNextToken() == tok_identifier)
771 ArgNames.push_back(IdentifierStr);
772 if (CurTok != ')')
773 return LogErrorP("Expected ')' in prototype");
774
775 // success.
776 getNextToken(); // eat ')'.
777
778 // Verify right number of names for operator.
779 if (Kind && ArgNames.size() != Kind)
780 return LogErrorP("Invalid number of operands for operator");
781
782 return std::make_unique<PrototypeAST>(FnLoc, FnName, ArgNames, Kind != 0,
783 BinaryPrecedence);
784}
785
786/// definition ::= 'def' prototype expression
787static std::unique_ptr<FunctionAST> ParseDefinition() {
788 getNextToken(); // eat def.
789 auto Proto = ParsePrototype();
790 if (!Proto)
791 return nullptr;
792
793 if (auto E = ParseExpression())
794 return std::make_unique<FunctionAST>(std::move(Proto), std::move(E));
795 return nullptr;
796}
797
798/// toplevelexpr ::= expression
799static std::unique_ptr<FunctionAST> ParseTopLevelExpr() {
800 SourceLocation FnLoc = CurLoc;
801 if (auto E = ParseExpression()) {
802 // Make an anonymous proto.
803 auto Proto = std::make_unique<PrototypeAST>(FnLoc, "__anon_expr",
804 std::vector<std::string>());
805 return std::make_unique<FunctionAST>(std::move(Proto), std::move(E));
806 }
807 return nullptr;
808}
809
810/// external ::= 'extern' prototype
811static std::unique_ptr<PrototypeAST> ParseExtern() {
812 getNextToken(); // eat extern.
813 return ParsePrototype();
814}
815
816//===----------------------------------------------------------------------===//
817// Code Generation Globals
818//===----------------------------------------------------------------------===//
819
820static std::unique_ptr<LLVMContext> TheContext;
821static std::unique_ptr<Module> TheModule;
822static std::unique_ptr<IRBuilder<>> Builder;
823static ExitOnError ExitOnErr;
824
825static std::map<std::string, AllocaInst *> NamedValues;
826static std::unique_ptr<KaleidoscopeJIT> TheJIT;
827static std::map<std::string, std::unique_ptr<PrototypeAST>> FunctionProtos;
828
829//===----------------------------------------------------------------------===//
830// Debug Info Support
831//===----------------------------------------------------------------------===//
832
833static std::unique_ptr<DIBuilder> DBuilder;
834
835DIType *DebugInfo::getDoubleTy() {
836 if (DblTy)
837 return DblTy;
838
839 DblTy = DBuilder->createBasicType("double", 64, dwarf::DW_ATE_float);
840 return DblTy;
841}
842
843void DebugInfo::emitLocation(ExprAST *AST) {
844 if (!AST)
845 return Builder->SetCurrentDebugLocation(DebugLoc());
846 DIScope *Scope;
847 if (LexicalBlocks.empty())
848 Scope = TheCU;
849 else
850 Scope = LexicalBlocks.back();
851 Builder->SetCurrentDebugLocation(DILocation::get(
852 Scope->getContext(), AST->getLine(), AST->getCol(), Scope));
853}
854
855static DISubroutineType *CreateFunctionType(unsigned NumArgs, DIFile *Unit) {
856 SmallVector<Metadata *, 8> EltTys;
857 DIType *DblTy = KSDbgInfo.getDoubleTy();
858
859 // Add the result type.
860 EltTys.push_back(DblTy);
861
862 for (unsigned i = 0, e = NumArgs; i != e; ++i)
863 EltTys.push_back(DblTy);
864
865 return DBuilder->createSubroutineType(DBuilder->getOrCreateTypeArray(EltTys));
866}
867
868//===----------------------------------------------------------------------===//
869// Code Generation
870//===----------------------------------------------------------------------===//
871
872Value *LogErrorV(const char *Str) {
873 LogError(Str);
874 return nullptr;
875}
876
877Function *getFunction(std::string Name) {
878 // First, see if the function has already been added to the current module.
879 if (auto *F = TheModule->getFunction(Name))
880 return F;
881
882 // If not, check whether we can codegen the declaration from some existing
883 // prototype.
884 auto FI = FunctionProtos.find(Name);
885 if (FI != FunctionProtos.end())
886 return FI->second->codegen();
887
888 // If no existing prototype exists, return null.
889 return nullptr;
890}
891
892/// CreateEntryBlockAlloca - Create an alloca instruction in the entry block of
893/// the function. This is used for mutable variables etc.
894static AllocaInst *CreateEntryBlockAlloca(Function *TheFunction,
895 StringRef VarName) {
896 IRBuilder<> TmpB(&TheFunction->getEntryBlock(),
897 TheFunction->getEntryBlock().begin());
898 return TmpB.CreateAlloca(Type::getDoubleTy(*TheContext), nullptr, VarName);
899}
900
901Value *NumberExprAST::codegen() {
902 KSDbgInfo.emitLocation(this);
903 return ConstantFP::get(*TheContext, APFloat(Val));
904}
905
906Value *VariableExprAST::codegen() {
907 // Look this variable up in the function.
908 Value *V = NamedValues[Name];
909 if (!V)
910 return LogErrorV("Unknown variable name");
911
912 KSDbgInfo.emitLocation(this);
913 // Load the value.
914 return Builder->CreateLoad(Type::getDoubleTy(*TheContext), V, Name.c_str());
915}
916
917Value *UnaryExprAST::codegen() {
918 Value *OperandV = Operand->codegen();
919 if (!OperandV)
920 return nullptr;
921
922 Function *F = getFunction(std::string("unary") + Opcode);
923 if (!F)
924 return LogErrorV("Unknown unary operator");
925
926 KSDbgInfo.emitLocation(this);
927 return Builder->CreateCall(F, OperandV, "unop");
928}
929
930Value *BinaryExprAST::codegen() {
931 KSDbgInfo.emitLocation(this);
932
933 // Special case '=' because we don't want to emit the LHS as an expression.
934 if (Op == '=') {
935 // Assignment requires the LHS to be an identifier.
936 // This assume we're building without RTTI because LLVM builds that way by
937 // default. If you build LLVM with RTTI this can be changed to a
938 // dynamic_cast for automatic error checking.
939 VariableExprAST *LHSE = static_cast<VariableExprAST *>(LHS.get());
940 if (!LHSE)
941 return LogErrorV("destination of '=' must be a variable");
942 // Codegen the RHS.
943 Value *Val = RHS->codegen();
944 if (!Val)
945 return nullptr;
946
947 // Look up the name.
948 Value *Variable = NamedValues[LHSE->getName()];
949 if (!Variable)
950 return LogErrorV("Unknown variable name");
951
952 Builder->CreateStore(Val, Variable);
953 return Val;
954 }
955
956 Value *L = LHS->codegen();
957 Value *R = RHS->codegen();
958 if (!L || !R)
959 return nullptr;
960
961 switch (Op) {
962 case '+':
963 return Builder->CreateFAdd(L, R, "addtmp");
964 case '-':
965 return Builder->CreateFSub(L, R, "subtmp");
966 case '*':
967 return Builder->CreateFMul(L, R, "multmp");
968 case '<':
969 L = Builder->CreateFCmpULT(L, R, "cmptmp");
970 // Convert bool 0/1 to double 0.0 or 1.0
971 return Builder->CreateUIToFP(L, Type::getDoubleTy(*TheContext), "booltmp");
972 default:
973 break;
974 }
975
976 // If it wasn't a builtin binary operator, it must be a user defined one. Emit
977 // a call to it.
978 Function *F = getFunction(std::string("binary") + Op);
979 assert(F && "binary operator not found!");
980
981 Value *Ops[] = {L, R};
982 return Builder->CreateCall(F, Ops, "binop");
983}
984
985Value *CallExprAST::codegen() {
986 KSDbgInfo.emitLocation(this);
987
988 // Look up the name in the global module table.
989 Function *CalleeF = getFunction(Callee);
990 if (!CalleeF)
991 return LogErrorV("Unknown function referenced");
992
993 // If argument mismatch error.
994 if (CalleeF->arg_size() != Args.size())
995 return LogErrorV("Incorrect # arguments passed");
996
997 std::vector<Value *> ArgsV;
998 for (unsigned i = 0, e = Args.size(); i != e; ++i) {
999 ArgsV.push_back(Args[i]->codegen());
1000 if (!ArgsV.back())
1001 return nullptr;
1002 }
1003
1004 return Builder->CreateCall(CalleeF, ArgsV, "calltmp");
1005}
1006
1007Value *IfExprAST::codegen() {
1008 KSDbgInfo.emitLocation(this);
1009
1010 Value *CondV = Cond->codegen();
1011 if (!CondV)
1012 return nullptr;
1013
1014 // Convert condition to a bool by comparing non-equal to 0.0.
1015 CondV = Builder->CreateFCmpONE(
1016 CondV, ConstantFP::get(*TheContext, APFloat(0.0)), "ifcond");
1017
1018 Function *TheFunction = Builder->GetInsertBlock()->getParent();
1019
1020 // Create blocks for the then and else cases. Insert the 'then' block at the
1021 // end of the function.
1022 BasicBlock *ThenBB = BasicBlock::Create(*TheContext, "then", TheFunction);
1023 BasicBlock *ElseBB = BasicBlock::Create(*TheContext, "else");
1024 BasicBlock *MergeBB = BasicBlock::Create(*TheContext, "ifcont");
1025
1026 Builder->CreateCondBr(CondV, ThenBB, ElseBB);
1027
1028 // Emit then value.
1029 Builder->SetInsertPoint(ThenBB);
1030
1031 Value *ThenV = Then->codegen();
1032 if (!ThenV)
1033 return nullptr;
1034
1035 Builder->CreateBr(MergeBB);
1036 // Codegen of 'Then' can change the current block, update ThenBB for the PHI.
1037 ThenBB = Builder->GetInsertBlock();
1038
1039 // Emit else block.
1040 TheFunction->getBasicBlockList().push_back(ElseBB);
1041 Builder->SetInsertPoint(ElseBB);
1042
1043 Value *ElseV = Else->codegen();
1044 if (!ElseV)
1045 return nullptr;
1046
1047 Builder->CreateBr(MergeBB);
1048 // Codegen of 'Else' can change the current block, update ElseBB for the PHI.
1049 ElseBB = Builder->GetInsertBlock();
1050
1051 // Emit merge block.
1052 TheFunction->getBasicBlockList().push_back(MergeBB);
1053 Builder->SetInsertPoint(MergeBB);
1054 PHINode *PN = Builder->CreatePHI(Type::getDoubleTy(*TheContext), 2, "iftmp");
1055
1056 PN->addIncoming(ThenV, ThenBB);
1057 PN->addIncoming(ElseV, ElseBB);
1058 return PN;
1059}
1060
1061// Output for-loop as:
1062// var = alloca double
1063// ...
1064// start = startexpr
1065// store start -> var
1066// goto loop
1067// loop:
1068// ...
1069// bodyexpr
1070// ...
1071// loopend:
1072// step = stepexpr
1073// endcond = endexpr
1074//
1075// curvar = load var
1076// nextvar = curvar + step
1077// store nextvar -> var
1078// br endcond, loop, endloop
1079// outloop:
1080Value *ForExprAST::codegen() {
1081 Function *TheFunction = Builder->GetInsertBlock()->getParent();
1082
1083 // Create an alloca for the variable in the entry block.
1084 AllocaInst *Alloca = CreateEntryBlockAlloca(TheFunction, VarName);
1085
1086 KSDbgInfo.emitLocation(this);
1087
1088 // Emit the start code first, without 'variable' in scope.
1089 Value *StartVal = Start->codegen();
1090 if (!StartVal)
1091 return nullptr;
1092
1093 // Store the value into the alloca.
1094 Builder->CreateStore(StartVal, Alloca);
1095
1096 // Make the new basic block for the loop header, inserting after current
1097 // block.
1098 BasicBlock *LoopBB = BasicBlock::Create(*TheContext, "loop", TheFunction);
1099
1100 // Insert an explicit fall through from the current block to the LoopBB.
1101 Builder->CreateBr(LoopBB);
1102
1103 // Start insertion in LoopBB.
1104 Builder->SetInsertPoint(LoopBB);
1105
1106 // Within the loop, the variable is defined equal to the PHI node. If it
1107 // shadows an existing variable, we have to restore it, so save it now.
1108 AllocaInst *OldVal = NamedValues[VarName];
1109 NamedValues[VarName] = Alloca;
1110
1111 // Emit the body of the loop. This, like any other expr, can change the
1112 // current BB. Note that we ignore the value computed by the body, but don't
1113 // allow an error.
1114 if (!Body->codegen())
1115 return nullptr;
1116
1117 // Emit the step value.
1118 Value *StepVal = nullptr;
1119 if (Step) {
1120 StepVal = Step->codegen();
1121 if (!StepVal)
1122 return nullptr;
1123 } else {
1124 // If not specified, use 1.0.
1125 StepVal = ConstantFP::get(*TheContext, APFloat(1.0));
1126 }
1127
1128 // Compute the end condition.
1129 Value *EndCond = End->codegen();
1130 if (!EndCond)
1131 return nullptr;
1132
1133 // Reload, increment, and restore the alloca. This handles the case where
1134 // the body of the loop mutates the variable.
1135 Value *CurVar = Builder->CreateLoad(Type::getDoubleTy(*TheContext), Alloca,
1136 VarName.c_str());
1137 Value *NextVar = Builder->CreateFAdd(CurVar, StepVal, "nextvar");
1138 Builder->CreateStore(NextVar, Alloca);
1139
1140 // Convert condition to a bool by comparing non-equal to 0.0.
1141 EndCond = Builder->CreateFCmpONE(
1142 EndCond, ConstantFP::get(*TheContext, APFloat(0.0)), "loopcond");
1143
1144 // Create the "after loop" block and insert it.
1145 BasicBlock *AfterBB =
1146 BasicBlock::Create(*TheContext, "afterloop", TheFunction);
1147
1148 // Insert the conditional branch into the end of LoopEndBB.
1149 Builder->CreateCondBr(EndCond, LoopBB, AfterBB);
1150
1151 // Any new code will be inserted in AfterBB.
1152 Builder->SetInsertPoint(AfterBB);
1153
1154 // Restore the unshadowed variable.
1155 if (OldVal)
1156 NamedValues[VarName] = OldVal;
1157 else
1158 NamedValues.erase(VarName);
1159
1160 // for expr always returns 0.0.
1161 return Constant::getNullValue(Type::getDoubleTy(*TheContext));
1162}
1163
1164Value *VarExprAST::codegen() {
1165 std::vector<AllocaInst *> OldBindings;
1166
1167 Function *TheFunction = Builder->GetInsertBlock()->getParent();
1168
1169 // Register all variables and emit their initializer.
1170 for (unsigned i = 0, e = VarNames.size(); i != e; ++i) {
1171 const std::string &VarName = VarNames[i].first;
1172 ExprAST *Init = VarNames[i].second.get();
1173
1174 // Emit the initializer before adding the variable to scope, this prevents
1175 // the initializer from referencing the variable itself, and permits stuff
1176 // like this:
1177 // var a = 1 in
1178 // var a = a in ... # refers to outer 'a'.
1179 Value *InitVal;
1180 if (Init) {
1181 InitVal = Init->codegen();
1182 if (!InitVal)
1183 return nullptr;
1184 } else { // If not specified, use 0.0.
1185 InitVal = ConstantFP::get(*TheContext, APFloat(0.0));
1186 }
1187
1188 AllocaInst *Alloca = CreateEntryBlockAlloca(TheFunction, VarName);
1189 Builder->CreateStore(InitVal, Alloca);
1190
1191 // Remember the old variable binding so that we can restore the binding when
1192 // we unrecurse.
1193 OldBindings.push_back(NamedValues[VarName]);
1194
1195 // Remember this binding.
1196 NamedValues[VarName] = Alloca;
1197 }
1198
1199 KSDbgInfo.emitLocation(this);
1200
1201 // Codegen the body, now that all vars are in scope.
1202 Value *BodyVal = Body->codegen();
1203 if (!BodyVal)
1204 return nullptr;
1205
1206 // Pop all our variables from scope.
1207 for (unsigned i = 0, e = VarNames.size(); i != e; ++i)
1208 NamedValues[VarNames[i].first] = OldBindings[i];
1209
1210 // Return the body computation.
1211 return BodyVal;
1212}
1213
1214Function *PrototypeAST::codegen() {
1215 // Make the function type: double(double,double) etc.
1216 std::vector<Type *> Doubles(Args.size(), Type::getDoubleTy(*TheContext));
1217 FunctionType *FT =
1218 FunctionType::get(Type::getDoubleTy(*TheContext), Doubles, false);
1219
1220 Function *F =
1221 Function::Create(FT, Function::ExternalLinkage, Name, TheModule.get());
1222
1223 // Set names for all arguments.
1224 unsigned Idx = 0;
1225 for (auto &Arg : F->args())
1226 Arg.setName(Args[Idx++]);
1227
1228 return F;
1229}
1230
1231Function *FunctionAST::codegen() {
1232 // Transfer ownership of the prototype to the FunctionProtos map, but keep a
1233 // reference to it for use below.
1234 auto &P = *Proto;
1235 FunctionProtos[Proto->getName()] = std::move(Proto);
1236 Function *TheFunction = getFunction(P.getName());
1237 if (!TheFunction)
1238 return nullptr;
1239
1240 // If this is an operator, install it.
1241 if (P.isBinaryOp())
1242 BinopPrecedence[P.getOperatorName()] = P.getBinaryPrecedence();
1243
1244 // Create a new basic block to start insertion into.
1245 BasicBlock *BB = BasicBlock::Create(*TheContext, "entry", TheFunction);
1246 Builder->SetInsertPoint(BB);
1247
1248 // Create a subprogram DIE for this function.
1249 DIFile *Unit = DBuilder->createFile(KSDbgInfo.TheCU->getFilename(),
1250 KSDbgInfo.TheCU->getDirectory());
1251 DIScope *FContext = Unit;
1252 unsigned LineNo = P.getLine();
1253 unsigned ScopeLine = LineNo;
1254 DISubprogram *SP = DBuilder->createFunction(
1255 FContext, P.getName(), StringRef(), Unit, LineNo,
1256 CreateFunctionType(TheFunction->arg_size(), Unit), ScopeLine,
1257 DINode::FlagPrototyped, DISubprogram::SPFlagDefinition);
1258 TheFunction->setSubprogram(SP);
1259
1260 // Push the current scope.
1261 KSDbgInfo.LexicalBlocks.push_back(SP);
1262
1263 // Unset the location for the prologue emission (leading instructions with no
1264 // location in a function are considered part of the prologue and the debugger
1265 // will run past them when breaking on a function)
1266 KSDbgInfo.emitLocation(nullptr);
1267
1268 // Record the function arguments in the NamedValues map.
1269 NamedValues.clear();
1270 unsigned ArgIdx = 0;
1271 for (auto &Arg : TheFunction->args()) {
1272 // Create an alloca for this variable.
1273 AllocaInst *Alloca = CreateEntryBlockAlloca(TheFunction, Arg.getName());
1274
1275 // Create a debug descriptor for the variable.
1276 DILocalVariable *D = DBuilder->createParameterVariable(
1277 SP, Arg.getName(), ++ArgIdx, Unit, LineNo, KSDbgInfo.getDoubleTy(),
1278 true);
1279
1280 DBuilder->insertDeclare(Alloca, D, DBuilder->createExpression(),
1281 DILocation::get(SP->getContext(), LineNo, 0, SP),
1282 Builder->GetInsertBlock());
1283
1284 // Store the initial value into the alloca.
1285 Builder->CreateStore(&Arg, Alloca);
1286
1287 // Add arguments to variable symbol table.
1288 NamedValues[std::string(Arg.getName())] = Alloca;
1289 }
1290
1291 KSDbgInfo.emitLocation(Body.get());
1292
1293 if (Value *RetVal = Body->codegen()) {
1294 // Finish off the function.
1295 Builder->CreateRet(RetVal);
1296
1297 // Pop off the lexical block for the function.
1298 KSDbgInfo.LexicalBlocks.pop_back();
1299
1300 // Validate the generated code, checking for consistency.
1301 verifyFunction(*TheFunction);
1302
1303 return TheFunction;
1304 }
1305
1306 // Error reading body, remove function.
1307 TheFunction->eraseFromParent();
1308
1309 if (P.isBinaryOp())
1310 BinopPrecedence.erase(Proto->getOperatorName());
1311
1312 // Pop off the lexical block for the function since we added it
1313 // unconditionally.
1314 KSDbgInfo.LexicalBlocks.pop_back();
1315
1316 return nullptr;
1317}
1318
1319//===----------------------------------------------------------------------===//
1320// Top-Level parsing and JIT Driver
1321//===----------------------------------------------------------------------===//
1322
1323static void InitializeModule() {
1324 // Open a new module.
1325 TheContext = std::make_unique<LLVMContext>();
1326 TheModule = std::make_unique<Module>("my cool jit", *TheContext);
1327 TheModule->setDataLayout(TheJIT->getDataLayout());
1328
1329 Builder = std::make_unique<IRBuilder<>>(*TheContext);
1330}
1331
1332static void HandleDefinition() {
1333 if (auto FnAST = ParseDefinition()) {
1334 if (!FnAST->codegen())
1335 fprintf(stderr, "Error reading function definition:");
1336 } else {
1337 // Skip token for error recovery.
1338 getNextToken();
1339 }
1340}
1341
1342static void HandleExtern() {
1343 if (auto ProtoAST = ParseExtern()) {
1344 if (!ProtoAST->codegen())
1345 fprintf(stderr, "Error reading extern");
1346 else
1347 FunctionProtos[ProtoAST->getName()] = std::move(ProtoAST);
1348 } else {
1349 // Skip token for error recovery.
1350 getNextToken();
1351 }
1352}
1353
1354static void HandleTopLevelExpression() {
1355 // Evaluate a top-level expression into an anonymous function.
1356 if (auto FnAST = ParseTopLevelExpr()) {
1357 if (!FnAST->codegen()) {
1358 fprintf(stderr, "Error generating code for top level expr");
1359 }
1360 } else {
1361 // Skip token for error recovery.
1362 getNextToken();
1363 }
1364}
1365
1366/// top ::= definition | external | expression | ';'
1367static void MainLoop() {
1368 while (true) {
1369 switch (CurTok) {
1370 case tok_eof:
1371 return;
1372 case ';': // ignore top-level semicolons.
1373 getNextToken();
1374 break;
1375 case tok_def:
1376 HandleDefinition();
1377 break;
1378 case tok_extern:
1379 HandleExtern();
1380 break;
1381 default:
1382 HandleTopLevelExpression();
1383 break;
1384 }
1385 }
1386}
1387
1388//===----------------------------------------------------------------------===//
1389// "Library" functions that can be "extern'd" from user code.
1390//===----------------------------------------------------------------------===//
1391
1392#ifdef _WIN32
1393#define DLLEXPORT __declspec(dllexport)
1394#else
1395#define DLLEXPORT
1396#endif
1397
1398/// putchard - putchar that takes a double and returns 0.
1399extern "C" DLLEXPORT double putchard(double X) {
1400 fputc((char)X, stderr);
1401 return 0;
1402}
1403
1404/// printd - printf that takes a double prints it as "%f\n", returning 0.
1405extern "C" DLLEXPORT double printd(double X) {
1406 fprintf(stderr, "%f\n", X);
1407 return 0;
1408}
1409
1410//===----------------------------------------------------------------------===//
1411// Main driver code.
1412//===----------------------------------------------------------------------===//
1413
1414int main() {
1415 InitializeNativeTarget();
1416 InitializeNativeTargetAsmPrinter();
1417 InitializeNativeTargetAsmParser();
1418
1419 // Install standard binary operators.
1420 // 1 is lowest precedence.
1421 BinopPrecedence['='] = 2;
1422 BinopPrecedence['<'] = 10;
1423 BinopPrecedence['+'] = 20;
1424 BinopPrecedence['-'] = 20;
1425 BinopPrecedence['*'] = 40; // highest.
1426
1427 // Prime the first token.
1428 getNextToken();
1429
1430 TheJIT = ExitOnErr(KaleidoscopeJIT::Create());
1431
1432 InitializeModule();
1433
1434 // Add the current debug info version into the module.
1435 TheModule->addModuleFlag(Module::Warning, "Debug Info Version",
1436 DEBUG_METADATA_VERSION);
1437
1438 // Darwin only supports dwarf2.
1439 if (Triple(sys::getProcessTriple()).isOSDarwin())
1440 TheModule->addModuleFlag(llvm::Module::Warning, "Dwarf Version", 2);
1441
1442 // Construct the DIBuilder, we do this here because we need the module.
1443 DBuilder = std::make_unique<DIBuilder>(*TheModule);
1444
1445 // Create the compile unit for the module.
1446 // Currently down as "fib.ks" as a filename since we're redirecting stdin
1447 // but we'd like actual source locations.
1448 KSDbgInfo.TheCU = DBuilder->createCompileUnit(
1449 dwarf::DW_LANG_C, DBuilder->createFile("fib.ks", "."),
1450 "Kaleidoscope Compiler", false, "", 0);
1451
1452 // Run the main "interpreter loop" now.
1453 MainLoop();
1454
1455 // Finalize the debug info.
1456 DBuilder->finalize();
1457
1458 // Print out all of the generated code.
1459 TheModule->print(errs(), nullptr);
1460
1461 return 0;
1462}
整体流程
初始化
-
native target 相关初始化
-
运算符优先级初始化
-
token 初始化(预先读入一个 token)
之所以预先读入一个 token, 是为了后面
MainLoop
能够开始 switch 到一个正确的 token.
然后创建 KaleidoscopeJIT 对象。初始化 Module 和 PassManager
主循环
会进行词法分析,并在遇到特定 token 时转入语法分析,然后生成 LLVM IR,进行 JIT 操作并通过函数指针的形式调用执行代码。循环进行这一过程。
Kaleidoscope 的 JIT 基于 llvm::orc
执行引擎实现。
基本操作
LLVM 如何生成 IR 的?
LLVM 提供 IRBuilder 类,用于构建 LLVM IR,通过调用 Builder.CreateFAdd
之类的方法,可以生成一个对应指令的 IR Value。Value 是 LLVM 中的一种万能对象。
从 Function *FunctionAST::codegen()
可以看到具体的函数范围 IR 的生成操作。
-
BasicBlock::Create
创建函数 entry 块 -
Builder->SetInsertPoint(BB);
设置将 Builder 构建的 Value 插入到哪个基本块。 -
Builder->CreateStore
为参数创建 Store 指令 -
Body->codegen()
创建函数体代码 -
Builder->CreateRet(RetVal)
将函数体和返回值交给 Builder 插入
LLVM 的 Pass 是如何应用到代码的?
首先我们会创建 PassManager,向其中添加各种 Pass。当生成 IR 后,通过 TheFPM->run(*TheFunction);
将 Pass 应用到函数上。
LLVM 的 JIT 的工作流程(Orc)
ResourceTracker 用于管理 JIT 的内存分配回收。
auto RT = TheJIT->getMainJITDylib().createResourceTracker();
// ...
ExitOnErr(RT->remove());
CompileLayer 是 JIT 的核心。我们通过 addModule
将当前 Module 添加到 Layer 里,然后通过 ExecutionSession->lookup
查找编译出来的 __anon_expr
符号并执行其地址表示的函数。
一些有用的资料
https://vod.video.cornell.edu/media/CS+6120A+Lesson+6A+Writing+an+LLVM+Pass/1_4nrtmvc9/179754792 https://www.cs.cornell.edu/~asampson/blog/llvm.html
介绍如何自定义一个 Pass