当前位置:文档之家› 提取文本里的数字文本

提取文本里的数字文本

一次性统统提取——提取文本里的数字文本
作者:小雨
所谓数字文本,就是1、2、3、4这样的单个或连续数字(包括0.8、1.2这样的数字),但不包括IP地址那种有多个小数点的文字(如有需要,请自行修改)。这些数字通常隐藏在众多非数字的文本文件或者字符串中。这里可以一次性把他们全部提取出来放入列表里,列表里的Strings部分保存原始文本,Values里保存转化成数字的变量。使用起来与TStringList风格相似,留给有这种需要的人。

有一个特例:对于单一的“.”符号,也会被收录(我在类里没有处理,因为这种写法,有可能就是懒人0.0的简写,练手的代码,不考虑这么多),如果不喜欢这样的处理,可以自行修改。

unit NumFromStr;

interface

uses
Classes, SysUtils;

type
PSeriesItem = ^TSeriesItem;
TSeriesItem = record
FNumString: string;
FNumValue : Extended;
end;

PSeriesItemList = ^TSeriesItemList;
TSeriesItemList = array[0..Maxint div 64] of TSeriesItem;

TPickNumStringHelper = class(TObject)
private
FList: PSeriesItemList;
FCount: Integer;
FCapacity: Integer;
procedure Error(Index: Integer);
procedure Grow;
function GetIntegerValue(Index: Integer): Integer;
function StartNumber(const S: string):Integer;
function StopNumber(const S: string):Integer;
protected
function Get(Index: Integer): string;virtual;
function GetCapacity: Integer;virtual;
function GetCount: Integer;virtual;
function GetValue(Index: Integer): Extended; virtual;
procedure ParseTextStr(const AnyString: string); virtual;
procedure Put(Index: Integer; const NumString: string); virtual;
function SepValue(const S: string;const lPos:integer; var VarStr: string):Integer;virtual;
procedure SetValue(Index: Integer; NumValue: Extended); virtual;
procedure SetCapacity(NewCapacity: Integer);virtual;
procedure InsertItem(Index: Integer;const NumString: string; NumValue: Extended); virtual;
public
destructor Destroy; override;
procedure Clear;
function Add(const AnyString: string): Integer;
function AddNumValue(const NumString: string; NumValue: Extended): Integer;
procedure LoadFromFile(const FileName: string);
procedure LoadFromStream(Stream: TStream);
property Count: Integer read GetCount;
property ValsToInt[Index: Integer]: Integer read GetIntegerValue;
property Values[Index: Integer]: Extended read GetValue write SetValue;
property Strings[Index: Integer]: string read Get write Put; default;
end;

implementation


// #################################

#############################
// # #
// # ++ Write by 小雨哥 #
// # #
// ##############################################################

{ TPickNumStringHelper }

destructor TPickNumStringHelper.Destroy;
begin
if FCount <> 0 then Finalize(FList^[0], FCount);
FCount := 0;
SetCapacity(0);
inherited Destroy;
end;

function TPickNumStringHelper.Add(const AnyString: string): Integer;
var
I, J: Integer;
VarStr,temp:string;
FNumValue: Extended;
begin
Result := 1;
temp := AnyString;

while true do
begin
I := StartNumber(temp);
if I < Length(AnyString) then
begin
J := SepValue(temp, I, VarStr);
if J > 1 then
begin
/// FNumValue: "." value auto to 0
if TryStrToFloat(VarStr, FNumValue) then
Result := AddNumValue(VarStr, FNumValue);
end else Break;
temp := Copy(temp, I + j - 1, MaxInt);
end
else Break;
end;
end;

function TPickNumStringHelper.AddNumValue(const NumString: string; NumValue: Extended): Integer;
begin
Result := FCount;
/// This is a faster add action, not test it!!! add is free.
InsertItem(Result, NumString, NumValue);
end;

procedure TPickNumStringHelper.Clear;
begin
if FCount <> 0 then
begin
Finalize(FList^[0], FCount);
FCount := 0;
SetCapacity(0);
end;
end;

procedure TPickNumStringHelper.Error(Index: Integer);
function ReturnAddr: Pointer;
asm
MOV EAX,[EBP+4]
end;
const
IndexError = 'index out of bounds (%d)';
begin
raise Exception.CreateFmt(IndexError, [Index]) at ReturnAddr;
end;

function TPickNumStringHelper.Get(Index: Integer): string;
begin
if (Index < 0) or (Index >= FCount) then
Error(Index)
else
Result := FList^[Index].FNumString;
end;

function TPickNumStringHelper.GetCapacity: Integer;
begin
Result := FCapacity;
end;

function TPickNumStringHelper.GetCount: Integer;
begin
Result := FCount;
end;

function TPickNumStringHelper.GetIntegerValue(Index: Integer): Integer;
begin
Result := Round(GetValue(Index));
end;

function TPickNumStringHelper.GetValue(Index: Integer): Extended;
begin
Result := 0.0;
if (Index < 0) or (Index >= FCount) then
Error(Index)
else
Result := FList^[Index].FNumValue;
end;

procedure TPickNumStringHelper.Grow;
var
Delta: Integer;
begin
if FCapacity > 128 then D

elta := FCapacity div 4 else
if FCapacity > 32 then Delta := 16 else Delta := 4;
SetCapacity(FCapacity + Delta);
end;

procedure TPickNumStringHelper.InsertItem(Index: Integer;const NumString: string; NumValue: Extended);
begin
if FCount = FCapacity then Grow;
if Index < FCount then
System.Move(FList^[Index], FList^[Index + 1], (FCount - Index) * SizeOf(TSeriesItem));
with FList^[Index] do
begin
Pointer(FNumString) := nil;
FNumValue := NumValue;
FNumString := NumString;
end;
Inc(FCount);
end;

procedure TPickNumStringHelper.LoadFromFile(const FileName: string);
var
Stream: TStream;
begin
Stream := TFileStream.Create(FileName, fmOpenRead or fmShareDenyWrite);
try
LoadFromStream(Stream);
finally
Stream.Free;
end;
end;

procedure TPickNumStringHelper.LoadFromStream(Stream: TStream);
var
Size: Integer;
S: string;
begin
Size := Stream.Size - Stream.Position;
SetString(S, nil, Size);
Stream.Read(Pointer(S)^, Size);
ParseTextStr(S);
end;

procedure TPickNumStringHelper.Put(Index: Integer; const NumString: string);
var
NumValue: Extended;
begin
if (Index < 0) or (Index >= FCount) then
Error(Index)
else
begin
if TryStrToFloat(NumString, NumValue) then
begin
FList^[Index].FNumString := NumString;
FList^[Index].FNumValue := NumValue;
end;
end;
end;

function TPickNumStringHelper.StartNumber(const S: string): Integer;
begin
Result := 1;
while (Result <= Length(S)) and (not (S[Result] in ['0'..'9', '.'])) do
Inc(Result);
end;

function TPickNumStringHelper.StopNumber(const S: string): Integer;
begin
Result := 1;
while (Result <= Length(S)) and (S[Result] in ['0'..'9', '.']) do
Inc(Result);
end;

function TPickNumStringHelper.SepValue(const S: string; const lPos: integer; var VarStr: string): Integer;
begin
VarStr := Copy(S, lPos, MaxInt);
Result := StopNumber(VarStr);
if Result > 1 then
begin
VarStr := Copy(S, lPos, Result - 1);

end
else VarStr := '';
end;

procedure TPickNumStringHelper.SetCapacity(NewCapacity: Integer);
begin
ReallocMem(FList, NewCapacity * SizeOf(TSeriesItem));
FCapacity := NewCapacity;
end;

procedure TPickNumStringHelper.ParseTextStr(const AnyString: string);
var
P, Start: PChar;
S: string;
begin
Clear;
P := Pointer(AnyString);
if P <> nil then
begin
while P^ <> #0 do
begin
Start := P;
while not (P^ in [#0, #10, #13]) do Inc(P);
SetString(S, Start, P - Start);
Add(S);
if P^ = #13 then Inc(P);
if P^ = #10 then Inc(P);

end;
end;
end;

procedure TPickNumStringHelper.SetValue(Index: Integer; NumValue: Extended);
begin
if (Index < 0) or (Index >= FCount) then
Error(Index)
else
begin
FList^[Index].FNumString := FloatToStr(NumValue);
FList^[Index].FNumValue := NumValue;
end;
end;

end.

代码没有充分测试,请有心人斧正。

小雨哥 (2010-5-6 23:57:50)
上面代码的提取核心代码如下,如果只是很少量的提取,可以直接使用:

/// 定位到数字文本开头
function ToNumberHelper(const S: string):Integer;
begin
Result := 1;
while (Result <= Length(S)) and (not (S[Result] in ['0'..'9', '.'])) do
Inc(Result);
end;

/// 定位到数字文本结尾
function ToStringHelper(const S: string):Integer;
begin
Result := 1;
while (Result <= Length(S)) and (S[Result] in ['0'..'9', '.']) do
Inc(Result);
end;

/// 找到一段提取一段
function GetNumValueHelper(const S: string;const lPos:integer; var VarStr: string):Integer;
begin
VarStr := Copy(S, lPos, MaxInt);
Result := ToStringHelper(VarStr);
if Result > 1 then
VarStr := Copy(S, lPos, Result - 1);
end;

/// 主函数,把文本的数字文本一个个取出来,中间用“|”符号分隔
function ParseStrToSeparatorValue(const S: string; FDelimiter: Char = '|'): string;
var
I, J: Integer;
NumStr,temp:string;
begin
temp := s;
Result := '';

while true do
begin
I := ToNumberHelper(temp);
if I < Length(S) then
begin
J := GetNumValueHelper(temp, I, NumStr);
if J > 1 then
begin
if Result ='' then
Result := NumStr
else
Result := Result + FDelimiter + NumStr;
end else Break;
temp := Copy(temp, I + j - 1, MaxInt);
end
else Break;
end;
end;


相关主题
文本预览
相关文档 最新文档