Codebase list unrar-nonfree / 2ad52506-5b8c-4338-a489-6e10246410af/main scantree.cpp
2ad52506-5b8c-4338-a489-6e10246410af/main

Tree @2ad52506-5b8c-4338-a489-6e10246410af/main (Download .tar.gz)

scantree.cpp @2ad52506-5b8c-4338-a489-6e10246410af/mainraw · history · blame

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
#include "rar.hpp"

ScanTree::ScanTree(StringList *FileMasks,RECURSE_MODE Recurse,bool GetLinks,SCAN_DIRS GetDirs)
{
  ScanTree::FileMasks=FileMasks;
  ScanTree::Recurse=Recurse;
  ScanTree::GetLinks=GetLinks;
  ScanTree::GetDirs=GetDirs;

  ScanEntireDisk=false;
  FolderWildcards=false;

  SetAllMaskDepth=0;
  *CurMask=0;
  memset(FindStack,0,sizeof(FindStack));
  Depth=0;
  Errors=0;
  *ErrArcName=0;
  Cmd=NULL;
  ErrDirList=NULL;
  ErrDirSpecPathLength=NULL;
}


ScanTree::~ScanTree()
{
  for (int I=Depth;I>=0;I--)
    if (FindStack[I]!=NULL)
      delete FindStack[I];
}


SCAN_CODE ScanTree::GetNext(FindData *FD)
{
  if (Depth<0)
    return SCAN_DONE;

#ifndef SILENT
  uint LoopCount=0;
#endif

  SCAN_CODE FindCode;
  while (1)
  {
    if (*CurMask==0 && !GetNextMask())
      return SCAN_DONE;

#ifndef SILENT
    // Let's return some ticks to system or WinRAR can become irresponsible
    // while scanning files in command like "winrar a -r arc c:\file.ext".
    // Also we reset system sleep timer here.
    if ((++LoopCount & 0x3ff)==0)
      Wait();
#endif

    FindCode=FindProc(FD);
    if (FindCode==SCAN_ERROR)
    {
      Errors++;
      continue;
    }
    if (FindCode==SCAN_NEXT)
      continue;
    if (FindCode==SCAN_SUCCESS && FD->IsDir && GetDirs==SCAN_SKIPDIRS)
      continue;
    if (FindCode==SCAN_DONE && GetNextMask())
      continue;
    if (FilterList.ItemsCount()>0 && FindCode==SCAN_SUCCESS)
      if (!CommandData::CheckArgs(&FilterList,FD->IsDir,FD->Name,false,MATCH_WILDSUBPATH))
        continue;
    break;
  }
  return FindCode;
}


// For masks like dir1\dir2*\*.ext in non-recursive mode.
bool ScanTree::ExpandFolderMask()
{
  bool WildcardFound=false;
  uint SlashPos=0;
  for (int I=0;CurMask[I]!=0;I++)
  {
    if (CurMask[I]=='?' || CurMask[I]=='*')
      WildcardFound=true;
    if (WildcardFound && IsPathDiv(CurMask[I]))
    {
      // First path separator position after folder wildcard mask.
      // In case of dir1\dir2*\dir3\name.ext mask it may point not to file
      // name, so we cannot use PointToName() here.
      SlashPos=I; 
      break;
    }
  }

  wchar Mask[NM];
  wcsncpyz(Mask,CurMask,ASIZE(Mask));
  Mask[SlashPos]=0;

  // Prepare the list of all folders matching the wildcard mask.
  ExpandedFolderList.Reset();
  FindFile Find;
  Find.SetMask(Mask);
  FindData FD;
  while (Find.Next(&FD))
    if (FD.IsDir)
    {
      wcsncatz(FD.Name,CurMask+SlashPos,ASIZE(FD.Name));

      // Treat dir*\* or dir*\*.* as dir, so empty 'dir' is also matched
      // by such mask. Skipping empty dir with dir*\*.* confused some users.
      wchar *LastMask=PointToName(FD.Name);
      if (wcscmp(LastMask,L"*")==0 || wcscmp(LastMask,L"*.*")==0)
        RemoveNameFromPath(FD.Name);

      ExpandedFolderList.AddString(FD.Name);
    }
  if (ExpandedFolderList.ItemsCount()==0)
    return false;
  // Return the first matching folder name now.
  ExpandedFolderList.GetString(CurMask,ASIZE(CurMask));
  return true;
}


// For masks like dir1\dir2*\file.ext this function sets 'dir1' recursive mask
// and '*\dir2*\file.ext' filter. Masks without folder wildcards are
// returned as is.
bool ScanTree::GetFilteredMask()
{
  // If we have some matching folders left for non-recursive folder wildcard
  // mask, we return it here.
  if (ExpandedFolderList.ItemsCount()>0 && ExpandedFolderList.GetString(CurMask,ASIZE(CurMask)))
    return true;

  FolderWildcards=false;
  FilterList.Reset();
  if (!FileMasks->GetString(CurMask,ASIZE(CurMask)))
    return false;

  // Check if folder wildcards present.
  bool WildcardFound=false;
  uint FolderWildcardCount=0;
  uint SlashPos=0;
  uint StartPos=0;
#ifdef _WIN_ALL // Not treat the special NTFS \\?\d: path prefix as a wildcard.
  if (CurMask[0]=='\\' && CurMask[1]=='\\' && CurMask[2]=='?' && CurMask[3]=='\\')
    StartPos=4;
#endif
  for (uint I=StartPos;CurMask[I]!=0;I++)
  {
    if (CurMask[I]=='?' || CurMask[I]=='*')
      WildcardFound=true;
    if (IsPathDiv(CurMask[I]) || IsDriveDiv(CurMask[I]))
    {
      if (WildcardFound)
      {
        // Calculate a number of folder wildcards in current mask.
        FolderWildcardCount++;
        WildcardFound=false;
      }
      if (FolderWildcardCount==0)
        SlashPos=I; // Slash position before first folder wildcard mask.
    }
  }
  if (FolderWildcardCount==0)
    return true;
  FolderWildcards=true; // Global folder wildcards flag.

  // If we have only one folder wildcard component and -r is missing or -r-
  // is specified, prepare matching folders in non-recursive mode.
  // We assume -r for masks like dir1*\dir2*\file*, because it is complicated
  // to fast find them using OS file find API call.
  if ((Recurse==RECURSE_NONE || Recurse==RECURSE_DISABLE) && FolderWildcardCount==1)
    return ExpandFolderMask();

  wchar Filter[NM];
  // Convert path\dir*\ to *\dir filter to search for 'dir' in all 'path' subfolders.
  wcsncpyz(Filter,L"*",ASIZE(Filter));
  AddEndSlash(Filter,ASIZE(Filter));
  // SlashPos might point or not point to path separator for masks like 'dir*', '\dir*' or 'd:dir*'
  wchar *WildName=IsPathDiv(CurMask[SlashPos]) || IsDriveDiv(CurMask[SlashPos]) ? CurMask+SlashPos+1 : CurMask+SlashPos;
  wcsncatz(Filter,WildName,ASIZE(Filter));

  // Treat dir*\* or dir*\*.* as dir\, so empty 'dir' is also matched
  // by such mask. Skipping empty dir with dir*\*.* confused some users.
  wchar *LastMask=PointToName(Filter);
  if (wcscmp(LastMask,L"*")==0 || wcscmp(LastMask,L"*.*")==0)
    *LastMask=0;

  FilterList.AddString(Filter);

  bool RelativeDrive=IsDriveDiv(CurMask[SlashPos]);
  if (RelativeDrive)
    SlashPos++; // Use "d:" instead of "d" for d:* mask.

  CurMask[SlashPos]=0;

  if (!RelativeDrive) // Keep d: mask as is, not convert to d:\*
  {
    // We need to append "\*" both for -ep1 to work correctly and to
    // convert d:\* masks previously truncated to d: back to original form.
    AddEndSlash(CurMask,ASIZE(CurMask));
    wcsncatz(CurMask,MASKALL,ASIZE(CurMask));
  }
  return true;
}


bool ScanTree::GetNextMask()
{
  if (!GetFilteredMask())
    return false;
#ifdef _WIN_ALL
  UnixSlashToDos(CurMask,CurMask,ASIZE(CurMask));
#endif

  // We wish to scan entire disk if mask like c:\ is specified
  // regardless of recursion mode. Use c:\*.* mask when need to scan only 
  // the root directory.
  ScanEntireDisk=IsDriveLetter(CurMask) && IsPathDiv(CurMask[2]) && CurMask[3]==0;

  wchar *Name=PointToName(CurMask);
  if (*Name==0)
    wcsncatz(CurMask,MASKALL,ASIZE(CurMask));
  if (Name[0]=='.' && (Name[1]==0 || Name[1]=='.' && Name[2]==0))
  {
    AddEndSlash(CurMask,ASIZE(CurMask));
    wcsncatz(CurMask,MASKALL,ASIZE(CurMask));
  }
  SpecPathLength=Name-CurMask;
  Depth=0;

  wcsncpyz(OrigCurMask,CurMask,ASIZE(OrigCurMask));

  return true;
}


SCAN_CODE ScanTree::FindProc(FindData *FD)
{
  if (*CurMask==0)
    return SCAN_NEXT;
  bool FastFindFile=false;
  
  if (FindStack[Depth]==NULL) // No FindFile object for this depth yet.
  {
    bool Wildcards=IsWildcard(CurMask);

    // If we have a file name without wildcards, we can try to use
    // FastFind to optimize speed. For example, in Unix it results in
    // stat call instead of opendir/readdir/closedir.
    bool FindCode=!Wildcards && FindFile::FastFind(CurMask,FD,GetLinks);

    // Link check is important for NTFS, where links can have "Directory"
    // attribute, but we do not want to recurse to them in "get links" mode.
    bool IsDir=FindCode && FD->IsDir && (!GetLinks || !FD->IsLink);

    // SearchAll means that we'll use "*" mask for search, so we'll find
    // subdirectories and will be able to recurse into them.
    // We do not use "*" for directories at any level or for files
    // at top level in recursion mode. We always comrpess the entire directory
    // if folder wildcard is specified.
    bool SearchAll=!IsDir && (Depth>0 || Recurse==RECURSE_ALWAYS ||
                   FolderWildcards && Recurse!=RECURSE_DISABLE || 
                   Wildcards && Recurse==RECURSE_WILDCARDS || 
                   ScanEntireDisk && Recurse!=RECURSE_DISABLE);
    if (Depth==0)
      SearchAllInRoot=SearchAll;
    if (SearchAll || Wildcards)
    {
      // Create the new FindFile object for wildcard based search.
      FindStack[Depth]=new FindFile;

      wchar SearchMask[NM];
      wcsncpyz(SearchMask,CurMask,ASIZE(SearchMask));
      if (SearchAll)
        SetName(SearchMask,MASKALL,ASIZE(SearchMask));
      FindStack[Depth]->SetMask(SearchMask);
    }
    else
    {
      // Either we failed to fast find or we found a file or we found
      // a directory in RECURSE_DISABLE mode, so we do not need to scan it.
      // We can return here and do not need to process further.
      // We need to process further only if we fast found a directory.
      if (!FindCode || !IsDir || Recurse==RECURSE_DISABLE)
      {
         // Return SCAN_SUCCESS if we found a file.
        SCAN_CODE RetCode=SCAN_SUCCESS;

        if (!FindCode)
        {
          // Return SCAN_ERROR if problem is more serious than just
          // "file not found".
          RetCode=FD->Error ? SCAN_ERROR:SCAN_NEXT;

          // If we failed to find an object, but our current mask is excluded,
          // we skip this object and avoid indicating an error.
          if (Cmd!=NULL && Cmd->ExclCheck(CurMask,false,true,true))
            RetCode=SCAN_NEXT;
          else
          {
            ErrHandler.OpenErrorMsg(ErrArcName,CurMask);
            // User asked to return RARX_NOFILES and not RARX_OPEN here.
            ErrHandler.SetErrorCode(RARX_NOFILES);
          }
        }

        // If we searched only for one file or directory in "fast find" 
        // (without a wildcard) mode, let's set masks to zero, 
        // so calling function will know that current mask is used 
        // and next one must be read from mask list for next call.
        // It is not necessary for directories, because even in "fast find"
        // mode, directory recursing will quit by (Depth < 0) condition,
        // which returns SCAN_DONE to calling function.
        *CurMask=0;

        return RetCode;
      }

      // We found a directory using only FindFile::FastFind function.
      FastFindFile=true;
    }
  }

  if (!FastFindFile && !FindStack[Depth]->Next(FD,GetLinks))
  {
    // We cannot find anything more in directory either because of
    // some error or just as result of all directory entries already read.

    bool Error=FD->Error;
    if (Error)
      ScanError(Error);

    wchar DirName[NM];
    *DirName=0;

    // Going to at least one directory level higher.
    delete FindStack[Depth];
    FindStack[Depth--]=NULL;
    while (Depth>=0 && FindStack[Depth]==NULL)
      Depth--;
    if (Depth < 0)
    {
      // Directories scanned both in normal and FastFindFile mode,
      // finally exit from scan here, by (Depth < 0) condition.

      if (Error)
        Errors++;
      return SCAN_DONE;
    }

    wchar *Slash=wcsrchr(CurMask,CPATHDIVIDER);
    if (Slash!=NULL)
    {
      wchar Mask[NM];
      wcsncpyz(Mask,Slash,ASIZE(Mask));
      if (Depth<SetAllMaskDepth)
        wcsncpyz(Mask+1,PointToName(OrigCurMask),ASIZE(Mask)-1);
      *Slash=0;
      wcsncpyz(DirName,CurMask,ASIZE(DirName));
      wchar *PrevSlash=wcsrchr(CurMask,CPATHDIVIDER);
      if (PrevSlash==NULL)
        wcsncpyz(CurMask,Mask+1,ASIZE(CurMask));
      else
      {
        *PrevSlash=0;
        wcsncatz(CurMask,Mask,ASIZE(CurMask));
      }
    }
    if (GetDirs==SCAN_GETDIRSTWICE &&
        FindFile::FastFind(DirName,FD,GetLinks) && FD->IsDir)
    {
      FD->Flags|=FDDF_SECONDDIR;
      return Error ? SCAN_ERROR:SCAN_SUCCESS;
    }
    return Error ? SCAN_ERROR:SCAN_NEXT;
  }

  // Link check is required for NTFS links, not for Unix.
  if (FD->IsDir && (!GetLinks || !FD->IsLink))
  {
    // If we found the directory in top (Depth==0) directory
    // and if we are not in "fast find" (directory name only as argument)
    // or in recurse (SearchAll was set when opening the top directory) mode,
    // we do not recurse into this directory. We either return it by itself
    // or skip it.
    if (!FastFindFile && Depth==0 && !SearchAllInRoot)
      return GetDirs==SCAN_GETCURDIRS ? SCAN_SUCCESS:SCAN_NEXT;

    // Let's check if directory name is excluded, so we do not waste
    // time searching in directory, which will be excluded anyway.
    if (Cmd!=NULL && (Cmd->ExclCheck(FD->Name,true,false,false) ||
        Cmd->ExclDirByAttr(FD->FileAttr)))
    {
      // If we are here in "fast find" mode, it means that entire directory
      // specified in command line is excluded. Then we need to return
      // SCAN_DONE to go to next mask and avoid the infinite loop
      // in GetNext() function. Such loop would be possible in case of
      // SCAN_NEXT code and "rar a arc dir -xdir" command.

      return FastFindFile ? SCAN_DONE:SCAN_NEXT;
    }
    
    wchar Mask[NM];

    wcsncpyz(Mask,FastFindFile ? MASKALL:PointToName(CurMask),ASIZE(Mask));
    wcsncpyz(CurMask,FD->Name,ASIZE(CurMask));

    if (wcslen(CurMask)+wcslen(Mask)+1>=NM || Depth>=MAXSCANDEPTH-1)
    {
      uiMsg(UIERROR_PATHTOOLONG,CurMask,SPATHDIVIDER,Mask);
      return SCAN_ERROR;
    }

    AddEndSlash(CurMask,ASIZE(CurMask));
    wcsncatz(CurMask,Mask,ASIZE(CurMask));

    Depth++;

    // We need to use OrigCurMask for depths less than SetAllMaskDepth
    // and "*" for depths equal or larger than SetAllMaskDepth.
    // It is important when "fast finding" directories at Depth > 0.
    // For example, if current directory is RootFolder and we compress
    // the following directories structure:
    //   RootFolder
    //     +--Folder1
    //     |  +--Folder2
    //     |  +--Folder3
    //     +--Folder4
    // with 'rar a -r arcname Folder2' command, rar could add not only
    // Folder1\Folder2 contents, but also Folder1\Folder3 if we were using
    // "*" mask at all levels. We need to use "*" mask inside of Folder2,
    // but return to "Folder2" mask when completing scanning Folder2.
    // We can rewrite SearchAll expression above to avoid fast finding
    // directories at Depth > 0, but then 'rar a -r arcname Folder2'
    // will add the empty Folder2 and do not add its contents.

    if (FastFindFile)
      SetAllMaskDepth=Depth;
  }
  if (!FastFindFile && !CmpName(CurMask,FD->Name,MATCH_NAMES))
    return SCAN_NEXT;

  return SCAN_SUCCESS;
}


void ScanTree::ScanError(bool &Error)
{
#ifdef _WIN_ALL
  if (Error)
  {
    // Get attributes of parent folder and do not display an error
    // if it is reparse point. We cannot scan contents of standard
    // Windows reparse points like "C:\Documents and Settings"
    // and we do not want to issue numerous useless errors for them.
    // We cannot just check FD->FileAttr here, it can be undefined
    // if we process "folder\*" mask or if we process "folder" mask,
    // but "folder" is inaccessible.
    wchar *Slash=PointToName(CurMask);
    if (Slash>CurMask)
    {
      *(Slash-1)=0;
      DWORD Attr=GetFileAttributes(CurMask);
      *(Slash-1)=CPATHDIVIDER;
      if (Attr!=0xffffffff && (Attr & FILE_ATTRIBUTE_REPARSE_POINT)!=0)
        Error=false;
    }

    // Do not display an error if we cannot scan contents of
    // "System Volume Information" folder. Normally it is not accessible.
    if (wcsstr(CurMask,L"System Volume Information\\")!=NULL)
      Error=false;
  }
#endif

  if (Error && Cmd!=NULL && Cmd->ExclCheck(CurMask,false,true,true))
    Error=false;

  if (Error)
  {
    if (ErrDirList!=NULL)
      ErrDirList->AddString(CurMask);
    if (ErrDirSpecPathLength!=NULL)
      ErrDirSpecPathLength->Push((uint)SpecPathLength);
    wchar FullName[NM];
    // This conversion works for wildcard masks too.
    ConvertNameToFull(CurMask,FullName,ASIZE(FullName));
    uiMsg(UIERROR_DIRSCAN,FullName);
    ErrHandler.SysErrMsg();
  }
}