use strict; use warnings; use feature 'say'; use v5.26; use feature qw(signatures); no warnings qw(experimental::signatures); use HTML::TableExtract; use Mojo::UserAgent; use Mojo::JSON qw(j encode_json); use Excel::Writer::XLSX; use Text::CSV; use DDP; use SQL::Abstract::Pg; use Time::Piece; # use Net::SSH::Tunnel; use lib '/mnt/vol/dev/pc/Packages/'; use sqlConn; # init SSH Tunnel # system ("ssh -fNT -L 5433:localhost:5432 ubuntu\@18.214.5.20 -i /home/superfly/.ssh/nehantic-dev.pem -p 2222"); #$initialize DB my $dbh = sqlConn->new( db => 'pc' )->dbh; my $sql = SQL::Abstract->new(); my $sql_tbl = 'pc.opportunity'; my $base_url = 'http://www.mybidmatch.com'; my $bm_home = 'http://www.mybidmatch.com/go?sub=058FAE4A-456C-4FF5-BA74-8EDD033DE87D'; ##find previous my ($stmt, @bind) = $sql->select($sql_tbl, 'notif_url'); my $sth = $dbh->prepare($stmt); $sth->execute(@bind); my $prev = $sth->fetchall_hashref('notif_url'); my $bm = extract_bidmatch($bm_home); for my $day (@$bm) { "a" =~ /a/; ## Reset captures to undef. my $url = $day->[0]; my $date = $day->[1]; next if $prev->{$url}; my $results = get_results($url); map {push @$_, $url} @$results; map {push @$_, $date} @$results; my $sam_data = get_sam_data($results); insert_rows($results); } refresh_report(); sub refresh_report() { my $sql = q'INSERT INTO pc.opportunity_report SELECT 1, e.* FROM pc.opportunity_extract e LEFT JOIN pc.opportunity_report r USING (id) WHERE r.id IS NULL'; $dbh->do($sql); } sub get_results($url) { my $ua = Mojo::UserAgent->new; my $res = $ua->get($url)->result->to_string; return if $res =~ /No articles/; $res =~ /(?(.|\n)*?<\/table>)/gm; return extract_table($1); } sub extract_table($html) { my $te = HTML::TableExtract->new(keep_html => 1, headers => [qw(# Source Agency FSG Title Keywords)]); $te->parse($html); my @results; # Examine all matching tables for my $ts ($te->tables) { for my $row ($ts->rows) { $row->[4] =~ s/href=\"/href=\"$base_url/; $row->[4] =~ /\s+?(.*)<\/a>/; $row->[4] = $2; $row->[6] = $1; push(@results, $row); } } return \@results } sub get_sam_data ($results) { for my $row (@$results) { my $bidmatch_link = $row->[6]; my $sam_link = get_doc($bidmatch_link); $sam_link =~ m|https://beta.sam.gov/opp/(.*)/view|; my $sam_json_link = "https://beta.sam.gov/api/prod/opps/v2/opportunities/$1"; if ($sam_link) { my $sam_json = get_sam_json($sam_json_link); push @$row, encode_json ($sam_json); } } } sub insert_rows ($results) { my $sam_link = $results->[6]; my @headers = qw(id source agency fsg title keywords oppy_url notif_url date oppy_det); my %data; my %psc_map = ( A => 'Research and Development', B => 'Special Studies and Analyses - Not R&D', C => 'Architect and Engineering - Construction', D => 'Automatic Data Processing and Telecommunication', E => 'Purchase of Structures and Facilities', F => 'Natural Resources and Conservation', G => 'Social Services', H => 'Quality Control, Testing, and Inspection', J => 'Maintenance, Repair, and Rebuilding of Equipment', K => 'Modification of Equipment', L => 'Technical Representative', M => 'Operation of Government Owned Facilities', N => 'Installation of Equipment', P => 'Salvage Services', Q => 'Medical Services', R => 'Professional, Administrative and Management Support', S => 'Utilities and Housekeeping Services', T => 'Photographic, Mapping, Printing, and Publications', U => 'Education and Training', V => 'Transportation, Travel and Relocation', W => 'Lease or Rental of Equipment', X => 'Lease or Rental of Facilities', Y => 'Construction of Structures and Facilities', Z => 'Maintenance, Repair or Alteration of Real Property',); for my $row (@$results) { @data{@headers} = @$row; my %ins_data = ( notif_source => $data{'source'}, notif_date => $data{'date'}, notif_url => $data{'notif_url'}, notif_details => $data{'title'}, oppty_desc => $psc_map{$data{'fsg'}}, oppty_source => $data{'agency'}, oppty_date => '', oppty_url => $data{'oppy_url'}, oppty_details => $data{'oppy_det'}, ); my ($stmt, @bind) = $sql->insert($sql_tbl, \%ins_data, {on_conflict => \'do nothing'}); my $sth = $dbh->prepare($stmt); $sth->execute(@bind); } } sub extract_bidmatch($url) { my $ua = Mojo::UserAgent->new; my $res = $ua->get($url)->result->to_string; $res =~ /(?(.|\n)*?<\/table>)/gm; my $html = $1; my $te = HTML::TableExtract->new(keep_html=>1, headers=>[qw(Date Articles Read)]); $te->parse($html); my @results; # Examine all matching tables for my $ts ($te->tables) { for my $row ($ts->rows) { my $sel1 = $row->[0]; $sel1 =~ /?(.*)<\/a>/; my $bm_det = "http://www.mybidmatch.com$1"; my $sel2 = $2; $sel2 =~ /.*, (\w{3} \d{1,2}), \d{4}$/; #my $date = $1; my $date = Time::Piece->strptime($1 . ' 2019', '%b %d %Y')->ymd; push(@results, [$bm_det, $date]); } } return \@results } sub write_csv ($rows, $date) { say "writing csv"; my $csv = Text::CSV->new({binary=>1, auto_diag=>1, eol => $/}); open my $fh, ">:encoding(utf8)", "/home/superfly/project-conquer/Project Conquer/Miscellaneous/bidmatch_$date.csv" or die "prime_contacts.csv: $!"; my @header = qw(# Source Agency FSG Title Keywords BM_Link Opp_Link NAICS PoP NAICS_Match); $csv->say ($fh, \@header); for my $row (@$rows) { $csv->say ($fh, $row); } close $fh or die "bidmatch.csv: $!"; } sub write_xlsx ($rows, $date) { say "writing xlsx"; open my $fh, ">>", '/home/superfly/project-conquer/Project Conquer/Miscellaneous/bidmatch.xlsx' or die "Open Failed: $!"; my $xlsx = Excel::Writer::XLSX->new($fh); my $worksheet = $xlsx->add_worksheet($date); my @header = qw(# Source Agency FSG Title Keywords BM_Link Opp_Link NAICS PoP NAICS_Match); $worksheet->write_row( 0, 0, \@header); $worksheet->write_col( 1, 0, $rows); $xlsx->close; } sub get_doc ($url) { say $url; my $ua = Mojo::UserAgent->new; my $res = $ua->get($url)->result->to_string; $res =~ /URL: /; return $1; } sub get_sam_json ($sam_link) { # https://beta.sam.gov/opp/57c1b3ccd33a4635b3a9decc5b015d0e/view # https://beta.sam.gov/api/prod/opps/v2/opportunities/57c1b3ccd33a4635b3a9decc5b015d0e my $ua = Mojo::UserAgent->new; my $res = $ua->get($sam_link)->result->json; return $res; }