I'm back,
Thanks for egafd movie, i'll examine it!
Code for birthday and birthplace :
procedure ParsePeople(URL : String; HTML : String);
///////////////////////////////////////////////////////////////////////////////////////////////////
var curpos, endpos, PosStart, PosEnd, debug_Pos1 : Integer; //
actPosstart, actposstart2, actPosStart3, actposend, UrlposStart, UrlposEnd, difPos : Integer;//
Pseudo, Born, Lien, URL1, Name, Title, Year, Av, Role, Notes, Note, tmpYear, OrigT : String;
posBorn, Country, Birthday : string;
I, counter : integer; //
///////////////////////////////////////////////////////////////////////////////////////////////////
begin
AddFieldValue(pfGenre, 'EGAFD');
// URL: //
Addfieldvalue(pfURL,URL);
// BORN: //
curpos := Pos('<th>Notes</th>', HTML);
endpos := curpos;
While (curpos > 0) AND (curpos < Posfrom('</tr>', HTML, EndPos)) do begin
endpos := curpos;
PosStart := PosFrom('<td><ul class="list"><li>', HTML, endpos);
PosEnd := PosFrom('</li></ul></td>', HTML, PosStart);
Born := Trim(Copy(HTML, (PosStart + 25), (PosEnd - PosStart - 25)));
Born := Uppercase(Copy(Born,0,1)) + Copy(Born,2, length(Born)-1) + #13;
//LogMessage('BORN :' + Born);
curpos := posfrom('<td><ul class="list"><li>', HTML, posend);
//AddFieldValue(pfBirthplace, Born);
LogMessage('BORN :' + Born);
end;
/// BirthDay:
ExplodeString(Born, PartBorn, #46);
If High(partBorn) > 0 then
Begin
logmessage('birhtday commence')
For I := Low(partBorn) to High(partBorn) do
Begin
PartBorn[I] := Trim(partBorn[I]);
End;
For I := Low(partBorn) to High(partBorn) do
Begin
If (Lowercase(partBorn[I]) = 'b') OR (Copy(partBorn[I], length(partBorn[I])-2,3) = ', b') then
BirthDay := '01/01/' + Copy(partBorn[I+1], 0, 4);
end;
end;
If BirthDay <> '' then AddFieldValue(pfBirthday, BirthDay);
// Birthplace:
TabCountry := ['Hungarian', 'French', 'Russian', 'Norwegian' , 'Austrian', 'Spanish', 'Bulgarian'
, 'English', 'German', 'Belgian', 'Canadian', 'Danish', 'Italian', 'Greek'
, 'Portuguese', 'Polish', 'Kyrgyzstan', 'Czech', 'exit'];
I := 0;
For I := Low(TabCountry) to High(Tabcountry) do
Begin
If Pos((copy(TabCountry[I], 1 ,Length(TabCountry[I])-1)),Born) > 0 then
Begin
Addfieldvalue(pfBirthPlace, TabCountry[I]);
I := High(TabCountry);
end;
End;
// AKA: //
curpos := Pos('<th>Pseudonyms</th>', HTML);
endpos := curpos;
While (curpos > 0) AND (curpos < Posfrom('<th>Films</th>', HTML, EndPos)) do begin
endpos := curpos;
PosStart := PosFrom('class="acta">', HTML, endpos);
PosEnd := PosFrom('</span>', HTML, PosStart);
Pseudo := Trim(Copy(HTML, (PosStart + 13), (PosEnd - PosStart - 13)));
LogMessage('AKA :' + Pseudo);
curpos := posfrom('class="acta">', HTML, posend);
AddFieldValue(pfAltnames, Pseudo);
end;
//////////
// BIO: //
curpos := Pos('<th>Films</th>', HTML);
LogMessage('Films readout');
if curPos > 0 then begin
Lien := '----- Filmographie (EGAFD) -----' + #09;
EndPos := curPos;
while (curPos > 0) AND (curPos < PosFrom('</ul>', HTML, EndPos)) do begin
EndPos := curPos; // Set last position to actual position
/// Get URL: ///
UrlPosStart := PosFrom('<a href="', HTML, EndPos); // search for url start
UrlPosEnd := PosFrom('" class="', HTML, UrlPosStart); // search for url end
URL1 := BASE_URL + Copy(HTML, UrlPosStart + 9, (UrlPosEnd - UrlPosStart - 9) );
LogMessage(URL1);
/// Get Name: ///
actPosStart := PosFrom('<a href="', HTML, EndPos); // search for url start;
actPosStart2 := PosFrom('">', HTML, actPosStart)
actPosEnd:=PosFrom('</a>', HTML, actPosStart2); // search for url end
Name := Trim(Copy(HTML, (actPosStart2 + 2), (actPosEnd - actPosStart2 - 2) ));
LogMessage(Name);
debug_pos1:=Pos('(',Name);
if debug_pos1 >0 then
Name := Copy(Name,0,debug_pos1-1);
LogMessage(Name);
/// Get Title (for movies): ///
actPosStart := PosFrom('<a href="', HTML, EndPos); // search for url start;
actPosStart2 := PosFrom('">', HTML, actPosStart)
actPosEnd:=PosFrom('</a>', HTML, actPosStart2); // search for url end
Title := Trim(Copy(HTML, (actPosStart2 + 2), (actPosEnd - actPosStart2 - 2) ));
LogMessage(Title);
debug_pos1:=Pos('(',Title);
if debug_pos1 >0 then
Title := Copy(Title,0,debug_pos1-1);
LogMessage(Title);
/// If Original: ///
actposstart := actposEnd + 5;
actposstart := PosFrom('">', HTML, actposstart) + 2;
actPosEnd := PosFrom('</', HTML, actPosstart) - 1;
If copy(HTML, actposstart, 3) = 'alt' then
OrigT := Copy(HTML, (actPosstart + 22),(actPosEnd-actPosStart-21))
else
OrigT := Title;
/// Notes: ///
actPosStart := PosFrom('<a href="', HTML, EndPos);
actPosStart2 := PosFrom('</a>', HTML, actPosStart);
Av := Trim(Copy(HTML, (actposstart2 + 5), 1));
logmessage('AV : ' + Av);
If Av = '<' then begin
actposStart2 := (actposstart2 + 5); //Step to go after "</a>"
actposstart3 := PosFrom('>', HTML, actposStart2);
actposend := PosFrom('<', HTML, actposstart3);
Notes := Trim(Copy(HTML, (actposstart3 +1), (actposend - actposStart3 - 1)));
logmessage('Notes :' + Notes);
end;
debug_pos1:=Pos('(',Notes);
if debug_pos1 >0 then
Year:= Copy(Notes,0,debug_pos1-1);
LogMessage(Notes);
/// Get Year & Note: ///
tmpYear := Copy(Notes, 0, 2);
logMessage('tmpYear :' + tmpYear);
Case tmpYear of
'c.' : Begin
Year := Copy(Notes,4,4);
Note := '';
end;
'19', '20' : Begin
If Copy(Notes,0,5) = 's' {OR Copy(Notes,0,5) = '?'} then begin
Year := Copy(Notes,0,5);
Note := Copy(Notes,7, Length(Notes)-6);
end
else begin
Year := Copy(Notes,0,4);
Note := Copy(Notes,6, Length(Notes)-5);
end;
If Copy(Notes,0,5) = '?' then begin
Year := Copy(Notes,0,5);
Note := Copy(Notes,7, Length(Notes)-6);
end
else begin
Year := Copy(Notes,0,4);
Note := Copy(Notes,6, Length(Notes)-5);
end;
end;
else begin
Year := '';
Note := Notes;
end;
end;
logmessage('Year :' + Year);
logmessage('Note :' + Note);
/// Get Role: ///
difpos := (PosFrom('<i>', HTML, (actposEnd-1))+4) - actposend;
logmessage('DIFFERENCE : ' + intToStr(difpos));
If difpos > 0 then begin
If difpos < 200 then begin
actPosStart := PosFrom('<i>', HTML, (actposend-1)) + 4;
actPosEnd:=PosFrom('</i></li>', HTML, actPosStart) - 1;
Role := Trim(Copy(HTML, actposStart, (actPosEnd - actPosStart)));
Role := StringReplace(Role, ';', ' - ', true, false, true);
Role := StringReplace(Role, '/', ' - ', true, false, true);
if Pos(', ', Role) = 1 then Delete(Role, 1, 2);
LogMessage('Role: ' + Role);
debug_pos1:=Pos('(',Role);
if debug_pos1 >0 then
Role:= Copy(Role,0,debug_pos1-1);
LogMessage(Role);
end;
end;
{AddPersonMovie(Trim(OrigT), '', '', Year, LowerCase(URL1), ctActors);} //Add movie in database
/// Total Line: ///
If Lien <> '' then
Lien := Lien + #13;
If URL1 <> '' then begin
If OrigT <> Title then
Lien := Lien + Name
else
Lien := Lien + '<link url="' + URL1 + '">' + Name + '</link>';
end;
If Year <> '' then
Lien := Lien + ' • ' + Year;
If Note <> '' then
Lien := Lien + ' • ' + Note;
If Role <> '' then
Lien := Lien + ' • ' + Role;
LogMessage('LIEN :' + Lien);
curPos := PosFrom('<a href="', HTML, actPosEnd);
end;
//////////
if (Lien <> '') AND (Born = '') then
AddFieldValue(pfBio, Lien);
if (Lien <> '') AND (Born <> '') then
AddFieldValue(pfBio, Born + #13#10 + Lien);
end;
// Photo:
curPos :=Pos('src="/actresses/id/',HTML);
if curPos > 0 then begin
EndPos := PosFrom('" width', HTML, curPos);
PhotoURL := BASE_URL + Copy(HTML, curPos + 5, EndPos - curPos - 5);
LogMessage('URL de la photo: '+ PhotoURL);
{PhotoURL := HTMLToText (PhotoURL);}
AddImageURL(4, PhotoURL);
end
else begin
PhotoURL := '';
end;
end;
for me 'birthday' works!
We must complete the list of country, i had certainly forgotten somes!
There are somes exeptions like French-Canadian, origin. This code takes only the first country and doesn't take care of origin.
I made some tests with urls up and it seems to be ok