从OS的角度来看,当一个文件创建时,大多数OS(Windows,Linux,Mac OS X)不会向磁盘写入数据,新创建的文件此时位于磁盘缓存中,之后才会真正写入磁盘。如图,日志文件位于OS磁盘缓存中,而不是位于磁盘。
上面 5步的代码的实现:
//事务指令的实现
//p1为数据库文件的索引号---0为main database;1为temporary tables使用的文件 //p2 不为0,一个写事务开始 case OP_Transaction: {
//数据库的索引号 int i = pOp->p1;
//指向数据库对应的btree Btree *pBt;
assert( i>=0 && i
assert( (p->btreeMask & (1<aDb[i].pBt;
if( pBt ){
//从这里btree开始事务,主要给文件加锁,并设置btree事务状态 rc = sqlite3BtreeBeginTrans(pBt, pOp->p2);
if( rc==SQLITE_BUSY ){ p->pc = pc;
p->rc = rc = SQLITE_BUSY;
goto vdbe_return; }
if( rc!=SQLITE_OK && rc!=SQLITE_READONLY /* && rc!=SQLITE_BUSY */ ){ goto abort_due_to_error; } }
break; }
//开始一个事务,如果第二个参数不为0,则一个写事务开始,否则是一个读事务 //如果wrflag>=2,一个exclusive事务开始,此时别的连接不能访问数据库 int sqlite3BtreeBeginTrans(Btree *p, int wrflag){ BtShared *pBt = p->pBt; int rc = SQLITE_OK;
btreeIntegrity(p);
/* If the btree is already in a write-transaction, or it
** is already in a read-transaction and a read-transaction ** is requested, this is a no-op. */
//如果b-tree处于一个写事务;或者处于一个读事务,一个读事务又请求,则返回SQLITE_OK if( p->inTrans==TRANS_WRITE || (p->inTrans==TRANS_READ && !wrflag) ){ return SQLITE_OK; }
/* Write transactions are not possible on a read-only database */ //写事务不能访问只读数据库 if( pBt->readOnly && wrflag ){ return SQLITE_READONLY; }
/* If another database handle has already opened a write transaction ** on this shared-btree structure and a second write transaction is ** requested, return SQLITE_BUSY. */
//如果数据库已存在一个写事务,则该写事务请求时返回SQLITE_BUSY if( pBt->inTransaction==TRANS_WRITE && wrflag ){ return SQLITE_BUSY; }
do {
//如果数据库对应btree的第一个页面还没读进内存 //则把该页面读进内存,数据库也相应的加read lock
if( pBt->pPage1==0 ){
//加read lock,并读页面到内存 rc = lockBtree(pBt); }
if( rc==SQLITE_OK && wrflag ){
//对数据库文件加RESERVED_LOCK锁
rc = sqlite3pager_begin(pBt->pPage1->aData, wrflag>1); if( rc==SQLITE_OK ){
rc = newDatabase(pBt); } }
if( rc==SQLITE_OK ){
if( wrflag ) pBt->inStmt = 0; }else{
unlockBtreeIfUnused(pBt); }
}while( rc==SQLITE_BUSY && pBt->inTransaction==TRANS_NONE && sqlite3InvokeBusyHandler(pBt->pBusyHandler) );
if( rc==SQLITE_OK ){
if( p->inTrans==TRANS_NONE ){ //btree的事务数加1 pBt->nTransaction++; }
//设置btree事务状态
p->inTrans = (wrflag?TRANS_WRITE:TRANS_READ); if( p->inTrans>pBt->inTransaction ){ pBt->inTransaction = p->inTrans; } }
btreeIntegrity(p); return rc; } /*
**获取数据库的写锁,发生以下情况时去除写锁: ** * sqlite3pager_commit() is called. ** * sqlite3pager_rollback() is called. ** * sqlite3pager_close() is called.
** * sqlite3pager_unref() is called to on every outstanding page. ** pData指向数据库的打开的页面,此时并不修改,仅仅只是获取
** 相应的pager,检查它是否处于read-lock状态。 **如果打开的不是临时文件,则打开日志文件. **如果数据库已经处于写状态,则do nothing */
int sqlite3pager_begin(void *pData, int exFlag){ PgHdr *pPg = DATA_TO_PGHDR(pData); Pager *pPager = pPg->pPager; int rc = SQLITE_OK; assert( pPg->nRef>0 );
assert( pPager->state!=PAGER_UNLOCK ); //pager已经处于share状态
if( pPager->state==PAGER_SHARED ){ assert( pPager->aInJournal==0 ); if( MEMDB ){
pPager->state = PAGER_EXCLUSIVE; pPager->origDbSize = pPager->dbSize; }else{
//对文件加 RESERVED_LOCK
rc = sqlite3OsLock(pPager->fd, RESERVED_LOCK); if( rc==SQLITE_OK ){
//设置pager的状态
pPager->state = PAGER_RESERVED; if( exFlag ){
rc = pager_wait_on_lock(pPager, EXCLUSIVE_LOCK); } }
if( rc!=SQLITE_OK ){ return rc; }
pPager->dirtyCache = 0;
TRACE2(\ //使用日志,不是临时文件,则打开日志文件 if( pPager->useJournal && !pPager->tempFile ){
//为pager打开日志文件,pager应该处于RESERVED或EXCLUSIVE状态 //会向日志文件写入header rc = pager_open_journal(pPager); } } }
return rc; }
//创建日志文件,pager应该处于RESERVED或EXCLUSIVE状态
static int pager_open_journal(Pager *pPager){ int rc;
assert( !MEMDB );
assert( pPager->state>=PAGER_RESERVED ); assert( pPager->journalOpen==0 ); assert( pPager->useJournal ); assert( pPager->aInJournal==0 ); sqlite3pager_pagecount(pPager);
//日志文件页面位图
pPager->aInJournal = sqliteMalloc( pPager->dbSize/8 + 1 ); if( pPager->aInJournal==0 ){ rc = SQLITE_NOMEM;
goto failed_to_open_journal; }
//打开日志文件
rc = sqlite3OsOpenExclusive(pPager->zJournal, &pPager->jfd, pPager->tempFile); //日志文件的位置指针 pPager->journalOff = 0; pPager->setMaster = 0; pPager->journalHdr = 0; if( rc!=SQLITE_OK ){
goto failed_to_open_journal; }
/*一般来说,os此时创建的文件位于磁盘缓存,并没有实际 **存在于磁盘,下面三个操作就是为了把结果写入磁盘,而对于 **windows系统来说,并没有提供相应API,所以实际上没有意义. */
//fullSync操作对windows没有意义
sqlite3OsSetFullSync(pPager->jfd, pPager->full_fsync); sqlite3OsSetFullSync(pPager->fd, pPager->full_fsync);
/* Attempt to open a file descriptor for the directory that contains a file. **This file descriptor can be used to fsync() the directory
**in order to make sure the creation of a new file is actually written to disk. */
sqlite3OsOpenDirectory(pPager->jfd, pPager->zDirectory); pPager->journalOpen = 1; pPager->journalStarted = 0; pPager->needSync = 0;
pPager->alwaysRollback = 0; pPager->nRec = 0; if( pPager->errCode ){ rc = pPager->errCode;
goto failed_to_open_journal;